2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "x86/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
139 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
140 top_xy -= s->mb_stride;
142 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
143 topleft_xy -= s->mb_stride;
144 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
145 topleft_xy += s->mb_stride;
146 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
147 topleft_partition = 0;
149 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
150 topright_xy -= s->mb_stride;
152 if (left_mb_field_flag != curr_mb_field_flag) {
153 left_xy[1] = left_xy[0] = pair_xy - 1;
154 if (curr_mb_field_flag) {
155 left_xy[1] += s->mb_stride;
156 left_block = left_block_options[3];
158 left_block= left_block_options[2 - bottom];
163 h->top_mb_xy = top_xy;
164 h->left_mb_xy[0] = left_xy[0];
165 h->left_mb_xy[1] = left_xy[1];
169 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
170 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
171 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
173 if(MB_MBAFF && !IS_INTRA(mb_type)){
175 for(list=0; list<h->list_count; list++){
176 //These values where changed for ease of performing MC, we need to change them back
177 //FIXME maybe we can make MC and loop filter use the same values or prevent
178 //the MC code from changing ref_cache and rather use a temporary array.
179 if(USES_LIST(mb_type,list)){
180 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
181 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
182 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
184 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
185 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
191 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
192 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
193 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
194 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
196 if(IS_INTRA(mb_type)){
197 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
198 h->topleft_samples_available=
199 h->top_samples_available=
200 h->left_samples_available= 0xFFFF;
201 h->topright_samples_available= 0xEEEA;
203 if(!(top_type & type_mask)){
204 h->topleft_samples_available= 0xB3FF;
205 h->top_samples_available= 0x33FF;
206 h->topright_samples_available= 0x26EA;
208 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
209 if(IS_INTERLACED(mb_type)){
210 if(!(left_type[0] & type_mask)){
211 h->topleft_samples_available&= 0xDFFF;
212 h->left_samples_available&= 0x5FFF;
214 if(!(left_type[1] & type_mask)){
215 h->topleft_samples_available&= 0xFF5F;
216 h->left_samples_available&= 0xFF5F;
219 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
220 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
221 assert(left_xy[0] == left_xy[1]);
222 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
223 h->topleft_samples_available&= 0xDF5F;
224 h->left_samples_available&= 0x5F5F;
228 if(!(left_type[0] & type_mask)){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(topleft_type & type_mask))
235 h->topleft_samples_available&= 0x7FFF;
237 if(!(topright_type & type_mask))
238 h->topright_samples_available&= 0xFBFF;
240 if(IS_INTRA4x4(mb_type)){
241 if(IS_INTRA4x4(top_type)){
242 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
243 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
244 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
245 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
248 if(!(top_type & type_mask))
253 h->intra4x4_pred_mode_cache[4+8*0]=
254 h->intra4x4_pred_mode_cache[5+8*0]=
255 h->intra4x4_pred_mode_cache[6+8*0]=
256 h->intra4x4_pred_mode_cache[7+8*0]= pred;
259 if(IS_INTRA4x4(left_type[i])){
260 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
261 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
264 if(!(left_type[i] & type_mask))
269 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
270 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
286 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
288 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
289 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
290 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
291 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
293 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
294 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
296 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
297 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
300 h->non_zero_count_cache[4+8*0]=
301 h->non_zero_count_cache[5+8*0]=
302 h->non_zero_count_cache[6+8*0]=
303 h->non_zero_count_cache[7+8*0]=
305 h->non_zero_count_cache[1+8*0]=
306 h->non_zero_count_cache[2+8*0]=
308 h->non_zero_count_cache[1+8*3]=
309 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
313 for (i=0; i<2; i++) {
315 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
316 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
317 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
318 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
320 h->non_zero_count_cache[3+8*1 + 2*8*i]=
321 h->non_zero_count_cache[3+8*2 + 2*8*i]=
322 h->non_zero_count_cache[0+8*1 + 8*i]=
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
330 h->top_cbp = h->cbp_table[top_xy];
331 } else if(IS_INTRA(mb_type)) {
338 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
339 } else if(IS_INTRA(mb_type)) {
345 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
348 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
353 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
355 for(list=0; list<h->list_count; list++){
356 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
357 /*if(!h->mv_cache_clean[list]){
358 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
359 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
360 h->mv_cache_clean[list]= 1;
364 h->mv_cache_clean[list]= 0;
366 if(USES_LIST(top_type, list)){
367 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
368 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
369 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
370 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
371 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
373 h->ref_cache[list][scan8[0] + 0 - 1*8]=
374 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
375 h->ref_cache[list][scan8[0] + 2 - 1*8]=
376 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
378 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
379 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
380 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
382 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
386 int cache_idx = scan8[0] - 1 + i*2*8;
387 if(USES_LIST(left_type[i], list)){
388 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
389 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
390 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
391 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
392 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
393 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
395 *(uint32_t*)h->mv_cache [list][cache_idx ]=
396 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
397 h->ref_cache[list][cache_idx ]=
398 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
402 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
405 if(USES_LIST(topleft_type, list)){
406 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
407 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
408 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
409 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
415 if(USES_LIST(topright_type, list)){
416 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
417 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
418 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
419 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
421 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
425 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
428 h->ref_cache[list][scan8[5 ]+1] =
429 h->ref_cache[list][scan8[7 ]+1] =
430 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
431 h->ref_cache[list][scan8[4 ]] =
432 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
433 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
434 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
435 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
436 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
437 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
440 /* XXX beurk, Load mvd */
441 if(USES_LIST(top_type, list)){
442 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
443 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
444 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
445 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
448 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
449 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
450 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
453 if(USES_LIST(left_type[0], list)){
454 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
458 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
461 if(USES_LIST(left_type[1], list)){
462 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
463 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
466 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
469 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
470 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
471 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
472 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
473 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
475 if(h->slice_type_nos == FF_B_TYPE){
476 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
478 if(IS_DIRECT(top_type)){
479 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
480 }else if(IS_8X8(top_type)){
481 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
482 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
483 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
488 if(IS_DIRECT(left_type[0]))
489 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
490 else if(IS_8X8(left_type[0]))
491 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
493 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
495 if(IS_DIRECT(left_type[1]))
496 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
497 else if(IS_8X8(left_type[1]))
498 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
500 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
506 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
507 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
508 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
509 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
510 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
512 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
513 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
514 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
515 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
517 #define MAP_F2F(idx, mb_type)\
518 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
519 h->ref_cache[list][idx] <<= 1;\
520 h->mv_cache[list][idx][1] /= 2;\
521 h->mvd_cache[list][idx][1] /= 2;\
526 #define MAP_F2F(idx, mb_type)\
527 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] >>= 1;\
529 h->mv_cache[list][idx][1] <<= 1;\
530 h->mvd_cache[list][idx][1] <<= 1;\
540 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
543 static inline void write_back_intra_pred_mode(H264Context *h){
544 const int mb_xy= h->mb_xy;
546 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
547 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
548 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
549 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
550 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
551 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
552 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
556 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
558 static inline int check_intra4x4_pred_mode(H264Context *h){
559 MpegEncContext * const s = &h->s;
560 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
561 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
564 if(!(h->top_samples_available&0x8000)){
566 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
568 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
571 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
576 if((h->left_samples_available&0x8888)!=0x8888){
577 static const int mask[4]={0x8000,0x2000,0x80,0x20};
579 if(!(h->left_samples_available&mask[i])){
580 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
582 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
585 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
592 } //FIXME cleanup like next
595 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
597 static inline int check_intra_pred_mode(H264Context *h, int mode){
598 MpegEncContext * const s = &h->s;
599 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
600 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
603 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
607 if(!(h->top_samples_available&0x8000)){
610 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
615 if((h->left_samples_available&0x8080) != 0x8080){
617 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
618 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
621 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
630 * gets the predicted intra4x4 prediction mode.
632 static inline int pred_intra_mode(H264Context *h, int n){
633 const int index8= scan8[n];
634 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
635 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
636 const int min= FFMIN(left, top);
638 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
640 if(min<0) return DC_PRED;
644 static inline void write_back_non_zero_count(H264Context *h){
645 const int mb_xy= h->mb_xy;
647 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
648 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
649 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
650 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
651 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
652 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
653 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
655 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
656 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
657 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
659 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
660 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
661 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
665 * gets the predicted number of non-zero coefficients.
666 * @param n block index
668 static inline int pred_non_zero_count(H264Context *h, int n){
669 const int index8= scan8[n];
670 const int left= h->non_zero_count_cache[index8 - 1];
671 const int top = h->non_zero_count_cache[index8 - 8];
674 if(i<64) i= (i+1)>>1;
676 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
681 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
682 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
683 MpegEncContext *s = &h->s;
685 /* there is no consistent mapping of mvs to neighboring locations that will
686 * make mbaff happy, so we can't move all this logic to fill_caches */
688 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
690 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
691 *C = h->mv_cache[list][scan8[0]-2];
694 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
695 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
696 if(IS_INTERLACED(mb_types[topright_xy])){
697 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
698 const int x4 = X4, y4 = Y4;\
699 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
700 if(!USES_LIST(mb_type,list))\
701 return LIST_NOT_USED;\
702 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
703 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
704 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
705 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
707 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
710 if(topright_ref == PART_NOT_AVAILABLE
711 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
712 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
714 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
715 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
718 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
720 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
721 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
727 if(topright_ref != PART_NOT_AVAILABLE){
728 *C= h->mv_cache[list][ i - 8 + part_width ];
731 tprintf(s->avctx, "topright MV not available\n");
733 *C= h->mv_cache[list][ i - 8 - 1 ];
734 return h->ref_cache[list][ i - 8 - 1 ];
739 * gets the predicted MV.
740 * @param n the block index
741 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
742 * @param mx the x component of the predicted motion vector
743 * @param my the y component of the predicted motion vector
745 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
746 const int index8= scan8[n];
747 const int top_ref= h->ref_cache[list][ index8 - 8 ];
748 const int left_ref= h->ref_cache[list][ index8 - 1 ];
749 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
750 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
752 int diagonal_ref, match_count;
754 assert(part_width==1 || part_width==2 || part_width==4);
764 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
765 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
766 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
767 if(match_count > 1){ //most common
768 *mx= mid_pred(A[0], B[0], C[0]);
769 *my= mid_pred(A[1], B[1], C[1]);
770 }else if(match_count==1){
774 }else if(top_ref==ref){
782 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
786 *mx= mid_pred(A[0], B[0], C[0]);
787 *my= mid_pred(A[1], B[1], C[1]);
791 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
795 * gets the directionally predicted 16x8 MV.
796 * @param n the block index
797 * @param mx the x component of the predicted motion vector
798 * @param my the y component of the predicted motion vector
800 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
802 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
803 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
805 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
813 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
814 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
826 pred_motion(h, n, 4, list, ref, mx, my);
830 * gets the directionally predicted 8x16 MV.
831 * @param n the block index
832 * @param mx the x component of the predicted motion vector
833 * @param my the y component of the predicted motion vector
835 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
837 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
838 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
840 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
851 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
853 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
855 if(diagonal_ref == ref){
863 pred_motion(h, n, 2, list, ref, mx, my);
866 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
867 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
868 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
870 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
872 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
873 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
874 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
880 pred_motion(h, 0, 4, 0, 0, mx, my);
885 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
886 int poc0 = h->ref_list[0][i].poc;
887 int td = av_clip(poc1 - poc0, -128, 127);
888 if(td == 0 || h->ref_list[0][i].long_ref){
891 int tb = av_clip(poc - poc0, -128, 127);
892 int tx = (16384 + (FFABS(td) >> 1)) / td;
893 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
897 static inline void direct_dist_scale_factor(H264Context * const h){
898 MpegEncContext * const s = &h->s;
899 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
900 const int poc1 = h->ref_list[1][0].poc;
902 for(field=0; field<2; field++){
903 const int poc = h->s.current_picture_ptr->field_poc[field];
904 const int poc1 = h->ref_list[1][0].field_poc[field];
905 for(i=0; i < 2*h->ref_count[0]; i++)
906 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
909 for(i=0; i<h->ref_count[0]; i++){
910 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
914 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
915 MpegEncContext * const s = &h->s;
916 Picture * const ref1 = &h->ref_list[1][0];
917 int j, old_ref, rfield;
918 int start= mbafi ? 16 : 0;
919 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
920 int interl= mbafi || s->picture_structure != PICT_FRAME;
922 /* bogus; fills in for missing frames */
923 memset(map[list], 0, sizeof(map[list]));
925 for(rfield=0; rfield<2; rfield++){
926 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
927 int poc = ref1->ref_poc[colfield][list][old_ref];
931 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
932 poc= (poc&~3) + rfield + 1;
934 for(j=start; j<end; j++){
935 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
936 int cur_ref= mbafi ? (j-16)^field : j;
937 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
939 map[list][old_ref] = cur_ref;
947 static inline void direct_ref_list_init(H264Context * const h){
948 MpegEncContext * const s = &h->s;
949 Picture * const ref1 = &h->ref_list[1][0];
950 Picture * const cur = s->current_picture_ptr;
952 int sidx= (s->picture_structure&1)^1;
953 int ref1sidx= (ref1->reference&1)^1;
955 for(list=0; list<2; list++){
956 cur->ref_count[sidx][list] = h->ref_count[list];
957 for(j=0; j<h->ref_count[list]; j++)
958 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
961 if(s->picture_structure == PICT_FRAME){
962 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
963 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
966 cur->mbaff= FRAME_MBAFF;
968 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
971 for(list=0; list<2; list++){
972 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
973 for(field=0; field<2; field++)
974 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
978 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
979 MpegEncContext * const s = &h->s;
980 int b8_stride = h->b8_stride;
981 int b4_stride = h->b_stride;
982 int mb_xy = h->mb_xy;
984 const int16_t (*l1mv0)[2], (*l1mv1)[2];
985 const int8_t *l1ref0, *l1ref1;
986 const int is_b8x8 = IS_8X8(*mb_type);
987 unsigned int sub_mb_type;
990 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
992 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
993 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
994 int cur_poc = s->current_picture_ptr->poc;
995 int *col_poc = h->ref_list[1]->field_poc;
996 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
997 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
999 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1000 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1001 mb_xy += s->mb_stride*fieldoff;
1004 }else{ // AFL/AFR/FR/FL -> AFR/FR
1005 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1006 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1007 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1008 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1011 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1012 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1013 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1015 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1016 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1018 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1019 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1021 }else{ // AFR/FR -> AFR/FR
1024 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1025 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1026 /* FIXME save sub mb types from previous frames (or derive from MVs)
1027 * so we know exactly what block size to use */
1028 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1029 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1030 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1031 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1032 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1034 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1035 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1040 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1041 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1042 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1043 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1046 l1ref0 += h->b8_stride;
1047 l1ref1 += h->b8_stride;
1048 l1mv0 += 2*b4_stride;
1049 l1mv1 += 2*b4_stride;
1053 if(h->direct_spatial_mv_pred){
1058 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1060 /* ref = min(neighbors) */
1061 for(list=0; list<2; list++){
1062 int refa = h->ref_cache[list][scan8[0] - 1];
1063 int refb = h->ref_cache[list][scan8[0] - 8];
1064 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1065 if(refc == PART_NOT_AVAILABLE)
1066 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1067 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1072 if(ref[0] < 0 && ref[1] < 0){
1073 ref[0] = ref[1] = 0;
1074 mv[0][0] = mv[0][1] =
1075 mv[1][0] = mv[1][1] = 0;
1077 for(list=0; list<2; list++){
1079 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1081 mv[list][0] = mv[list][1] = 0;
1087 *mb_type &= ~MB_TYPE_L1;
1088 sub_mb_type &= ~MB_TYPE_L1;
1089 }else if(ref[0] < 0){
1091 *mb_type &= ~MB_TYPE_L0;
1092 sub_mb_type &= ~MB_TYPE_L0;
1095 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1096 for(i8=0; i8<4; i8++){
1099 int xy8 = x8+y8*b8_stride;
1100 int xy4 = 3*x8+y8*b4_stride;
1103 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1105 h->sub_mb_type[i8] = sub_mb_type;
1107 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1108 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1109 if(!IS_INTRA(mb_type_col[y8])
1110 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1111 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1113 a= pack16to32(mv[0][0],mv[0][1]);
1115 b= pack16to32(mv[1][0],mv[1][1]);
1117 a= pack16to32(mv[0][0],mv[0][1]);
1118 b= pack16to32(mv[1][0],mv[1][1]);
1120 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1121 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1123 }else if(IS_16X16(*mb_type)){
1126 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1127 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1128 if(!IS_INTRA(mb_type_col[0])
1129 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1130 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1131 && (h->x264_build>33 || !h->x264_build)))){
1133 a= pack16to32(mv[0][0],mv[0][1]);
1135 b= pack16to32(mv[1][0],mv[1][1]);
1137 a= pack16to32(mv[0][0],mv[0][1]);
1138 b= pack16to32(mv[1][0],mv[1][1]);
1140 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1141 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1143 for(i8=0; i8<4; i8++){
1144 const int x8 = i8&1;
1145 const int y8 = i8>>1;
1147 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1149 h->sub_mb_type[i8] = sub_mb_type;
1151 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1152 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1153 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1154 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1157 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1158 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1159 && (h->x264_build>33 || !h->x264_build)))){
1160 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1161 if(IS_SUB_8X8(sub_mb_type)){
1162 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1163 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1165 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1167 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1170 for(i4=0; i4<4; i4++){
1171 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1172 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1174 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1176 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1182 }else{ /* direct temporal mv pred */
1183 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1184 const int *dist_scale_factor = h->dist_scale_factor;
1187 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1188 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1189 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1190 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1192 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1195 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1196 /* FIXME assumes direct_8x8_inference == 1 */
1197 int y_shift = 2*!IS_INTERLACED(*mb_type);
1199 for(i8=0; i8<4; i8++){
1200 const int x8 = i8&1;
1201 const int y8 = i8>>1;
1203 const int16_t (*l1mv)[2]= l1mv0;
1205 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1207 h->sub_mb_type[i8] = sub_mb_type;
1209 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1210 if(IS_INTRA(mb_type_col[y8])){
1211 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1212 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1213 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1217 ref0 = l1ref0[x8 + y8*b8_stride];
1219 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1221 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1224 scale = dist_scale_factor[ref0];
1225 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1228 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1229 int my_col = (mv_col[1]<<y_shift)/2;
1230 int mx = (scale * mv_col[0] + 128) >> 8;
1231 int my = (scale * my_col + 128) >> 8;
1232 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1233 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1239 /* one-to-one mv scaling */
1241 if(IS_16X16(*mb_type)){
1244 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1245 if(IS_INTRA(mb_type_col[0])){
1248 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1249 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1250 const int scale = dist_scale_factor[ref0];
1251 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1253 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1254 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1256 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1257 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1259 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1260 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1261 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1263 for(i8=0; i8<4; i8++){
1264 const int x8 = i8&1;
1265 const int y8 = i8>>1;
1267 const int16_t (*l1mv)[2]= l1mv0;
1269 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1271 h->sub_mb_type[i8] = sub_mb_type;
1272 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1273 if(IS_INTRA(mb_type_col[0])){
1274 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1275 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1276 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1280 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1282 ref0 = map_col_to_list0[0][ref0];
1284 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1287 scale = dist_scale_factor[ref0];
1289 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1290 if(IS_SUB_8X8(sub_mb_type)){
1291 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1292 int mx = (scale * mv_col[0] + 128) >> 8;
1293 int my = (scale * mv_col[1] + 128) >> 8;
1294 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1295 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1297 for(i4=0; i4<4; i4++){
1298 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1299 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1300 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1301 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1302 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1303 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1310 static inline void write_back_motion(H264Context *h, int mb_type){
1311 MpegEncContext * const s = &h->s;
1312 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1313 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1316 if(!USES_LIST(mb_type, 0))
1317 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1319 for(list=0; list<h->list_count; list++){
1321 if(!USES_LIST(mb_type, list))
1325 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1326 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1328 if( h->pps.cabac ) {
1329 if(IS_SKIP(mb_type))
1330 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1333 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1339 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1340 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1341 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1342 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1343 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1347 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1348 if(IS_8X8(mb_type)){
1349 uint8_t *direct_table = &h->direct_table[b8_xy];
1350 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1351 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1352 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1358 * Decodes a network abstraction layer unit.
1359 * @param consumed is the number of bytes used as input
1360 * @param length is the length of the array
1361 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1362 * @returns decoded bytes, might be src+1 if no escapes
1364 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1369 // src[0]&0x80; //forbidden bit
1370 h->nal_ref_idc= src[0]>>5;
1371 h->nal_unit_type= src[0]&0x1F;
1375 for(i=0; i<length; i++)
1376 printf("%2X ", src[i]);
1378 for(i=0; i+1<length; i+=2){
1379 if(src[i]) continue;
1380 if(i>0 && src[i-1]==0) i--;
1381 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1383 /* startcode, so we must be past the end */
1390 if(i>=length-1){ //no escaped 0
1391 *dst_length= length;
1392 *consumed= length+1; //+1 for the header
1396 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1397 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1398 dst= h->rbsp_buffer[bufidx];
1404 //printf("decoding esc\n");
1407 //remove escapes (very rare 1:2^22)
1408 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1409 if(src[si+2]==3){ //escape
1414 }else //next start code
1418 dst[di++]= src[si++];
1421 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1424 *consumed= si + 1;//+1 for the header
1425 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1430 * identifies the exact end of the bitstream
1431 * @return the length of the trailing, or 0 if damaged
1433 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1437 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1447 * IDCT transforms the 16 dc values and dequantizes them.
1448 * @param qp quantization parameter
1450 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1453 int temp[16]; //FIXME check if this is a good idea
1454 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1455 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1457 //memset(block, 64, 2*256);
1460 const int offset= y_offset[i];
1461 const int z0= block[offset+stride*0] + block[offset+stride*4];
1462 const int z1= block[offset+stride*0] - block[offset+stride*4];
1463 const int z2= block[offset+stride*1] - block[offset+stride*5];
1464 const int z3= block[offset+stride*1] + block[offset+stride*5];
1473 const int offset= x_offset[i];
1474 const int z0= temp[4*0+i] + temp[4*2+i];
1475 const int z1= temp[4*0+i] - temp[4*2+i];
1476 const int z2= temp[4*1+i] - temp[4*3+i];
1477 const int z3= temp[4*1+i] + temp[4*3+i];
1479 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1480 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1481 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1482 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1488 * DCT transforms the 16 dc values.
1489 * @param qp quantization parameter ??? FIXME
1491 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1492 // const int qmul= dequant_coeff[qp][0];
1494 int temp[16]; //FIXME check if this is a good idea
1495 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1496 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1499 const int offset= y_offset[i];
1500 const int z0= block[offset+stride*0] + block[offset+stride*4];
1501 const int z1= block[offset+stride*0] - block[offset+stride*4];
1502 const int z2= block[offset+stride*1] - block[offset+stride*5];
1503 const int z3= block[offset+stride*1] + block[offset+stride*5];
1512 const int offset= x_offset[i];
1513 const int z0= temp[4*0+i] + temp[4*2+i];
1514 const int z1= temp[4*0+i] - temp[4*2+i];
1515 const int z2= temp[4*1+i] - temp[4*3+i];
1516 const int z3= temp[4*1+i] + temp[4*3+i];
1518 block[stride*0 +offset]= (z0 + z3)>>1;
1519 block[stride*2 +offset]= (z1 + z2)>>1;
1520 block[stride*8 +offset]= (z1 - z2)>>1;
1521 block[stride*10+offset]= (z0 - z3)>>1;
1529 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1530 const int stride= 16*2;
1531 const int xStride= 16;
1534 a= block[stride*0 + xStride*0];
1535 b= block[stride*0 + xStride*1];
1536 c= block[stride*1 + xStride*0];
1537 d= block[stride*1 + xStride*1];
1544 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1545 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1546 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1547 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1551 static void chroma_dc_dct_c(DCTELEM *block){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1566 block[stride*0 + xStride*0]= (a+c);
1567 block[stride*0 + xStride*1]= (e+b);
1568 block[stride*1 + xStride*0]= (a-c);
1569 block[stride*1 + xStride*1]= (e-b);
1574 * gets the chroma qp.
1576 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1577 return h->pps.chroma_qp_table[t][qscale];
1580 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1581 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1582 int src_x_offset, int src_y_offset,
1583 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1584 MpegEncContext * const s = &h->s;
1585 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1586 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1587 const int luma_xy= (mx&3) + ((my&3)<<2);
1588 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1589 uint8_t * src_cb, * src_cr;
1590 int extra_width= h->emu_edge_width;
1591 int extra_height= h->emu_edge_height;
1593 const int full_mx= mx>>2;
1594 const int full_my= my>>2;
1595 const int pic_width = 16*s->mb_width;
1596 const int pic_height = 16*s->mb_height >> MB_FIELD;
1598 if(mx&7) extra_width -= 3;
1599 if(my&7) extra_height -= 3;
1601 if( full_mx < 0-extra_width
1602 || full_my < 0-extra_height
1603 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1604 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1605 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1606 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1610 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1612 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1615 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1618 // chroma offset when predicting from a field of opposite parity
1619 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1620 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1622 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1623 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1626 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1627 src_cb= s->edge_emu_buffer;
1629 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1632 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1633 src_cr= s->edge_emu_buffer;
1635 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1638 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1639 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1640 int x_offset, int y_offset,
1641 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1642 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1643 int list0, int list1){
1644 MpegEncContext * const s = &h->s;
1645 qpel_mc_func *qpix_op= qpix_put;
1646 h264_chroma_mc_func chroma_op= chroma_put;
1648 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1649 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1650 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1651 x_offset += 8*s->mb_x;
1652 y_offset += 8*(s->mb_y >> MB_FIELD);
1655 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1656 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1657 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1658 qpix_op, chroma_op);
1661 chroma_op= chroma_avg;
1665 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1666 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1667 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1668 qpix_op, chroma_op);
1672 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1673 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1674 int x_offset, int y_offset,
1675 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1676 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1677 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1678 int list0, int list1){
1679 MpegEncContext * const s = &h->s;
1681 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1682 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1683 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1684 x_offset += 8*s->mb_x;
1685 y_offset += 8*(s->mb_y >> MB_FIELD);
1688 /* don't optimize for luma-only case, since B-frames usually
1689 * use implicit weights => chroma too. */
1690 uint8_t *tmp_cb = s->obmc_scratchpad;
1691 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1692 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1693 int refn0 = h->ref_cache[0][ scan8[n] ];
1694 int refn1 = h->ref_cache[1][ scan8[n] ];
1696 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1697 dest_y, dest_cb, dest_cr,
1698 x_offset, y_offset, qpix_put, chroma_put);
1699 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1700 tmp_y, tmp_cb, tmp_cr,
1701 x_offset, y_offset, qpix_put, chroma_put);
1703 if(h->use_weight == 2){
1704 int weight0 = h->implicit_weight[refn0][refn1];
1705 int weight1 = 64 - weight0;
1706 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1707 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1708 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1710 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1711 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1712 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1713 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1714 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1715 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1716 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1717 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1718 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1721 int list = list1 ? 1 : 0;
1722 int refn = h->ref_cache[list][ scan8[n] ];
1723 Picture *ref= &h->ref_list[list][refn];
1724 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1725 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1726 qpix_put, chroma_put);
1728 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1729 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1730 if(h->use_weight_chroma){
1731 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1732 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1733 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1734 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1739 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1740 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1741 int x_offset, int y_offset,
1742 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1743 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1744 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1745 int list0, int list1){
1746 if((h->use_weight==2 && list0 && list1
1747 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1748 || h->use_weight==1)
1749 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1750 x_offset, y_offset, qpix_put, chroma_put,
1751 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1753 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1754 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1757 static inline void prefetch_motion(H264Context *h, int list){
1758 /* fetch pixels for estimated mv 4 macroblocks ahead
1759 * optimized for 64byte cache lines */
1760 MpegEncContext * const s = &h->s;
1761 const int refn = h->ref_cache[list][scan8[0]];
1763 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1764 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1765 uint8_t **src= h->ref_list[list][refn].data;
1766 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1767 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1768 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1769 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1773 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1774 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1775 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1776 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1777 MpegEncContext * const s = &h->s;
1778 const int mb_xy= h->mb_xy;
1779 const int mb_type= s->current_picture.mb_type[mb_xy];
1781 assert(IS_INTER(mb_type));
1783 prefetch_motion(h, 0);
1785 if(IS_16X16(mb_type)){
1786 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1787 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1788 &weight_op[0], &weight_avg[0],
1789 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1790 }else if(IS_16X8(mb_type)){
1791 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1792 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1793 &weight_op[1], &weight_avg[1],
1794 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1795 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1796 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1797 &weight_op[1], &weight_avg[1],
1798 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1799 }else if(IS_8X16(mb_type)){
1800 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1801 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1802 &weight_op[2], &weight_avg[2],
1803 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1804 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1805 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1806 &weight_op[2], &weight_avg[2],
1807 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1811 assert(IS_8X8(mb_type));
1814 const int sub_mb_type= h->sub_mb_type[i];
1816 int x_offset= (i&1)<<2;
1817 int y_offset= (i&2)<<1;
1819 if(IS_SUB_8X8(sub_mb_type)){
1820 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1821 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1822 &weight_op[3], &weight_avg[3],
1823 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1824 }else if(IS_SUB_8X4(sub_mb_type)){
1825 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1826 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1827 &weight_op[4], &weight_avg[4],
1828 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1829 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1830 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1831 &weight_op[4], &weight_avg[4],
1832 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1833 }else if(IS_SUB_4X8(sub_mb_type)){
1834 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1835 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1836 &weight_op[5], &weight_avg[5],
1837 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1838 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1839 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1840 &weight_op[5], &weight_avg[5],
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1844 assert(IS_SUB_4X4(sub_mb_type));
1846 int sub_x_offset= x_offset + 2*(j&1);
1847 int sub_y_offset= y_offset + (j&2);
1848 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1849 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1850 &weight_op[6], &weight_avg[6],
1851 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1857 prefetch_motion(h, 1);
1860 static av_cold void decode_init_vlc(void){
1861 static int done = 0;
1868 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1869 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1870 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1871 &chroma_dc_coeff_token_len [0], 1, 1,
1872 &chroma_dc_coeff_token_bits[0], 1, 1,
1873 INIT_VLC_USE_NEW_STATIC);
1877 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1878 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1879 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1880 &coeff_token_len [i][0], 1, 1,
1881 &coeff_token_bits[i][0], 1, 1,
1882 INIT_VLC_USE_NEW_STATIC);
1883 offset += coeff_token_vlc_tables_size[i];
1886 * This is a one time safety check to make sure that
1887 * the packed static coeff_token_vlc table sizes
1888 * were initialized correctly.
1890 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1893 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1894 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1895 init_vlc(&chroma_dc_total_zeros_vlc[i],
1896 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1897 &chroma_dc_total_zeros_len [i][0], 1, 1,
1898 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1899 INIT_VLC_USE_NEW_STATIC);
1901 for(i=0; i<15; i++){
1902 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1903 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1904 init_vlc(&total_zeros_vlc[i],
1905 TOTAL_ZEROS_VLC_BITS, 16,
1906 &total_zeros_len [i][0], 1, 1,
1907 &total_zeros_bits[i][0], 1, 1,
1908 INIT_VLC_USE_NEW_STATIC);
1912 run_vlc[i].table = run_vlc_tables[i];
1913 run_vlc[i].table_allocated = run_vlc_tables_size;
1914 init_vlc(&run_vlc[i],
1916 &run_len [i][0], 1, 1,
1917 &run_bits[i][0], 1, 1,
1918 INIT_VLC_USE_NEW_STATIC);
1920 run7_vlc.table = run7_vlc_table,
1921 run7_vlc.table_allocated = run7_vlc_table_size;
1922 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1923 &run_len [6][0], 1, 1,
1924 &run_bits[6][0], 1, 1,
1925 INIT_VLC_USE_NEW_STATIC);
1929 static void free_tables(H264Context *h){
1932 av_freep(&h->intra4x4_pred_mode);
1933 av_freep(&h->chroma_pred_mode_table);
1934 av_freep(&h->cbp_table);
1935 av_freep(&h->mvd_table[0]);
1936 av_freep(&h->mvd_table[1]);
1937 av_freep(&h->direct_table);
1938 av_freep(&h->non_zero_count);
1939 av_freep(&h->slice_table_base);
1940 h->slice_table= NULL;
1942 av_freep(&h->mb2b_xy);
1943 av_freep(&h->mb2b8_xy);
1945 for(i = 0; i < h->s.avctx->thread_count; i++) {
1946 hx = h->thread_context[i];
1948 av_freep(&hx->top_borders[1]);
1949 av_freep(&hx->top_borders[0]);
1950 av_freep(&hx->s.obmc_scratchpad);
1954 static void init_dequant8_coeff_table(H264Context *h){
1956 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1957 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1958 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1960 for(i=0; i<2; i++ ){
1961 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1962 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1966 for(q=0; q<52; q++){
1967 int shift = div6[q];
1970 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1971 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1972 h->pps.scaling_matrix8[i][x]) << shift;
1977 static void init_dequant4_coeff_table(H264Context *h){
1979 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1980 for(i=0; i<6; i++ ){
1981 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1983 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1984 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1991 for(q=0; q<52; q++){
1992 int shift = div6[q] + 2;
1995 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
1996 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
1997 h->pps.scaling_matrix4[i][x]) << shift;
2002 static void init_dequant_tables(H264Context *h){
2004 init_dequant4_coeff_table(h);
2005 if(h->pps.transform_8x8_mode)
2006 init_dequant8_coeff_table(h);
2007 if(h->sps.transform_bypass){
2010 h->dequant4_coeff[i][0][x] = 1<<6;
2011 if(h->pps.transform_8x8_mode)
2014 h->dequant8_coeff[i][0][x] = 1<<6;
2021 * needs width/height
2023 static int alloc_tables(H264Context *h){
2024 MpegEncContext * const s = &h->s;
2025 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2028 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2030 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2031 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2032 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2034 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2035 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2036 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2037 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2039 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2040 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2042 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2043 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2044 for(y=0; y<s->mb_height; y++){
2045 for(x=0; x<s->mb_width; x++){
2046 const int mb_xy= x + y*s->mb_stride;
2047 const int b_xy = 4*x + 4*y*h->b_stride;
2048 const int b8_xy= 2*x + 2*y*h->b8_stride;
2050 h->mb2b_xy [mb_xy]= b_xy;
2051 h->mb2b8_xy[mb_xy]= b8_xy;
2055 s->obmc_scratchpad = NULL;
2057 if(!h->dequant4_coeff[0])
2058 init_dequant_tables(h);
2067 * Mimic alloc_tables(), but for every context thread.
2069 static void clone_tables(H264Context *dst, H264Context *src){
2070 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2071 dst->non_zero_count = src->non_zero_count;
2072 dst->slice_table = src->slice_table;
2073 dst->cbp_table = src->cbp_table;
2074 dst->mb2b_xy = src->mb2b_xy;
2075 dst->mb2b8_xy = src->mb2b8_xy;
2076 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2077 dst->mvd_table[0] = src->mvd_table[0];
2078 dst->mvd_table[1] = src->mvd_table[1];
2079 dst->direct_table = src->direct_table;
2081 dst->s.obmc_scratchpad = NULL;
2082 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2087 * Allocate buffers which are not shared amongst multiple threads.
2089 static int context_init(H264Context *h){
2090 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2091 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2095 return -1; // free_tables will clean up for us
2098 static av_cold void common_init(H264Context *h){
2099 MpegEncContext * const s = &h->s;
2101 s->width = s->avctx->width;
2102 s->height = s->avctx->height;
2103 s->codec_id= s->avctx->codec->id;
2105 ff_h264_pred_init(&h->hpc, s->codec_id);
2107 h->dequant_coeff_pps= -1;
2108 s->unrestricted_mv=1;
2109 s->decode=1; //FIXME
2111 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2113 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2114 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2117 static av_cold int decode_init(AVCodecContext *avctx){
2118 H264Context *h= avctx->priv_data;
2119 MpegEncContext * const s = &h->s;
2121 MPV_decode_defaults(s);
2126 s->out_format = FMT_H264;
2127 s->workaround_bugs= avctx->workaround_bugs;
2130 // s->decode_mb= ff_h263_decode_mb;
2131 s->quarter_sample = 1;
2134 if(avctx->codec_id == CODEC_ID_SVQ3)
2135 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2137 avctx->pix_fmt= PIX_FMT_YUV420P;
2141 if(avctx->extradata_size > 0 && avctx->extradata &&
2142 *(char *)avctx->extradata == 1){
2149 h->thread_context[0] = h;
2150 h->outputed_poc = INT_MIN;
2151 h->prev_poc_msb= 1<<16;
2155 static int frame_start(H264Context *h){
2156 MpegEncContext * const s = &h->s;
2159 if(MPV_frame_start(s, s->avctx) < 0)
2161 ff_er_frame_start(s);
2163 * MPV_frame_start uses pict_type to derive key_frame.
2164 * This is incorrect for H.264; IDR markings must be used.
2165 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2166 * See decode_nal_units().
2168 s->current_picture_ptr->key_frame= 0;
2170 assert(s->linesize && s->uvlinesize);
2172 for(i=0; i<16; i++){
2173 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2174 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2177 h->block_offset[16+i]=
2178 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2179 h->block_offset[24+16+i]=
2180 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2183 /* can't be in alloc_tables because linesize isn't known there.
2184 * FIXME: redo bipred weight to not require extra buffer? */
2185 for(i = 0; i < s->avctx->thread_count; i++)
2186 if(!h->thread_context[i]->s.obmc_scratchpad)
2187 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2189 /* some macroblocks will be accessed before they're available */
2190 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2191 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2193 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2195 // We mark the current picture as non-reference after allocating it, so
2196 // that if we break out due to an error it can be released automatically
2197 // in the next MPV_frame_start().
2198 // SVQ3 as well as most other codecs have only last/next/current and thus
2199 // get released even with set reference, besides SVQ3 and others do not
2200 // mark frames as reference later "naturally".
2201 if(s->codec_id != CODEC_ID_SVQ3)
2202 s->current_picture_ptr->reference= 0;
2204 s->current_picture_ptr->field_poc[0]=
2205 s->current_picture_ptr->field_poc[1]= INT_MAX;
2206 assert(s->current_picture_ptr->long_ref==0);
2211 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2212 MpegEncContext * const s = &h->s;
2221 src_cb -= uvlinesize;
2222 src_cr -= uvlinesize;
2224 if(!simple && FRAME_MBAFF){
2226 offset = MB_MBAFF ? 1 : 17;
2227 uvoffset= MB_MBAFF ? 1 : 9;
2229 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2230 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2231 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2232 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2233 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2238 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2239 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2240 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2241 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2247 top_idx = MB_MBAFF ? 0 : 1;
2249 step= MB_MBAFF ? 2 : 1;
2252 // There are two lines saved, the line above the the top macroblock of a pair,
2253 // and the line above the bottom macroblock
2254 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2255 for(i=1; i<17 - skiplast; i++){
2256 h->left_border[offset+i*step]= src_y[15+i* linesize];
2259 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2260 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2262 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2263 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2264 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2265 for(i=1; i<9 - skiplast; i++){
2266 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2267 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2269 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2270 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2274 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2275 MpegEncContext * const s = &h->s;
2286 if(!simple && FRAME_MBAFF){
2288 offset = MB_MBAFF ? 1 : 17;
2289 uvoffset= MB_MBAFF ? 1 : 9;
2293 top_idx = MB_MBAFF ? 0 : 1;
2295 step= MB_MBAFF ? 2 : 1;
2298 if(h->deblocking_filter == 2) {
2300 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2301 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2303 deblock_left = (s->mb_x > 0);
2304 deblock_top = (s->mb_y > !!MB_FIELD);
2307 src_y -= linesize + 1;
2308 src_cb -= uvlinesize + 1;
2309 src_cr -= uvlinesize + 1;
2311 #define XCHG(a,b,t,xchg)\
2318 for(i = !deblock_top; i<16; i++){
2319 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2321 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2325 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2326 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2327 if(s->mb_x+1 < s->mb_width){
2328 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2332 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2334 for(i = !deblock_top; i<8; i++){
2335 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2336 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2338 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2339 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2342 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2343 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2348 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2349 MpegEncContext * const s = &h->s;
2350 const int mb_x= s->mb_x;
2351 const int mb_y= s->mb_y;
2352 const int mb_xy= h->mb_xy;
2353 const int mb_type= s->current_picture.mb_type[mb_xy];
2354 uint8_t *dest_y, *dest_cb, *dest_cr;
2355 int linesize, uvlinesize /*dct_offset*/;
2357 int *block_offset = &h->block_offset[0];
2358 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2359 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2360 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2361 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2363 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2364 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2365 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2367 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2368 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2370 if (!simple && MB_FIELD) {
2371 linesize = h->mb_linesize = s->linesize * 2;
2372 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2373 block_offset = &h->block_offset[24];
2374 if(mb_y&1){ //FIXME move out of this function?
2375 dest_y -= s->linesize*15;
2376 dest_cb-= s->uvlinesize*7;
2377 dest_cr-= s->uvlinesize*7;
2381 for(list=0; list<h->list_count; list++){
2382 if(!USES_LIST(mb_type, list))
2384 if(IS_16X16(mb_type)){
2385 int8_t *ref = &h->ref_cache[list][scan8[0]];
2386 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2388 for(i=0; i<16; i+=4){
2389 int ref = h->ref_cache[list][scan8[i]];
2391 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2397 linesize = h->mb_linesize = s->linesize;
2398 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2399 // dct_offset = s->linesize * 16;
2402 if (!simple && IS_INTRA_PCM(mb_type)) {
2403 for (i=0; i<16; i++) {
2404 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2406 for (i=0; i<8; i++) {
2407 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2408 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2411 if(IS_INTRA(mb_type)){
2412 if(h->deblocking_filter)
2413 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2415 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2416 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2417 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2420 if(IS_INTRA4x4(mb_type)){
2421 if(simple || !s->encoding){
2422 if(IS_8x8DCT(mb_type)){
2423 if(transform_bypass){
2425 idct_add = s->dsp.add_pixels8;
2427 idct_dc_add = s->dsp.h264_idct8_dc_add;
2428 idct_add = s->dsp.h264_idct8_add;
2430 for(i=0; i<16; i+=4){
2431 uint8_t * const ptr= dest_y + block_offset[i];
2432 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2433 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2434 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2436 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2437 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2438 (h->topright_samples_available<<i)&0x4000, linesize);
2440 if(nnz == 1 && h->mb[i*16])
2441 idct_dc_add(ptr, h->mb + i*16, linesize);
2443 idct_add (ptr, h->mb + i*16, linesize);
2448 if(transform_bypass){
2450 idct_add = s->dsp.add_pixels4;
2452 idct_dc_add = s->dsp.h264_idct_dc_add;
2453 idct_add = s->dsp.h264_idct_add;
2455 for(i=0; i<16; i++){
2456 uint8_t * const ptr= dest_y + block_offset[i];
2457 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2459 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2460 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2464 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2465 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2466 assert(mb_y || linesize <= block_offset[i]);
2467 if(!topright_avail){
2468 tr= ptr[3 - linesize]*0x01010101;
2469 topright= (uint8_t*) &tr;
2471 topright= ptr + 4 - linesize;
2475 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2476 nnz = h->non_zero_count_cache[ scan8[i] ];
2479 if(nnz == 1 && h->mb[i*16])
2480 idct_dc_add(ptr, h->mb + i*16, linesize);
2482 idct_add (ptr, h->mb + i*16, linesize);
2484 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2491 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2493 if(!transform_bypass)
2494 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2496 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2498 if(h->deblocking_filter)
2499 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2501 hl_motion(h, dest_y, dest_cb, dest_cr,
2502 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2503 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2504 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2508 if(!IS_INTRA4x4(mb_type)){
2510 if(IS_INTRA16x16(mb_type)){
2511 if(transform_bypass){
2512 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2513 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2515 for(i=0; i<16; i++){
2516 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2517 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2521 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2523 }else if(h->cbp&15){
2524 if(transform_bypass){
2525 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2526 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2527 for(i=0; i<16; i+=di){
2528 if(h->non_zero_count_cache[ scan8[i] ]){
2529 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2533 if(IS_8x8DCT(mb_type)){
2534 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2536 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2541 for(i=0; i<16; i++){
2542 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2543 uint8_t * const ptr= dest_y + block_offset[i];
2544 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2550 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2551 uint8_t *dest[2] = {dest_cb, dest_cr};
2552 if(transform_bypass){
2553 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2554 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2555 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2557 idct_add = s->dsp.add_pixels4;
2558 for(i=16; i<16+8; i++){
2559 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2560 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2564 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2565 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2567 idct_add = s->dsp.h264_idct_add;
2568 idct_dc_add = s->dsp.h264_idct_dc_add;
2569 for(i=16; i<16+8; i++){
2570 if(h->non_zero_count_cache[ scan8[i] ])
2571 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2572 else if(h->mb[i*16])
2573 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2576 for(i=16; i<16+8; i++){
2577 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2578 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2579 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2586 if(h->cbp || IS_INTRA(mb_type))
2587 s->dsp.clear_blocks(h->mb);
2589 if(h->deblocking_filter) {
2590 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2591 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2592 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2593 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2594 if (!simple && FRAME_MBAFF) {
2595 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2597 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2603 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2605 static void hl_decode_mb_simple(H264Context *h){
2606 hl_decode_mb_internal(h, 1);
2610 * Process a macroblock; this handles edge cases, such as interlacing.
2612 static void av_noinline hl_decode_mb_complex(H264Context *h){
2613 hl_decode_mb_internal(h, 0);
2616 static void hl_decode_mb(H264Context *h){
2617 MpegEncContext * const s = &h->s;
2618 const int mb_xy= h->mb_xy;
2619 const int mb_type= s->current_picture.mb_type[mb_xy];
2620 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2622 if(ENABLE_H264_ENCODER && !s->decode)
2626 hl_decode_mb_complex(h);
2627 else hl_decode_mb_simple(h);
2630 static void pic_as_field(Picture *pic, const int parity){
2632 for (i = 0; i < 4; ++i) {
2633 if (parity == PICT_BOTTOM_FIELD)
2634 pic->data[i] += pic->linesize[i];
2635 pic->reference = parity;
2636 pic->linesize[i] *= 2;
2638 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2641 static int split_field_copy(Picture *dest, Picture *src,
2642 int parity, int id_add){
2643 int match = !!(src->reference & parity);
2647 if(parity != PICT_FRAME){
2648 pic_as_field(dest, parity);
2650 dest->pic_id += id_add;
2657 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2661 while(i[0]<len || i[1]<len){
2662 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2664 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2667 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2668 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2671 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2672 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2679 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2684 best_poc= dir ? INT_MIN : INT_MAX;
2686 for(i=0; i<len; i++){
2687 const int poc= src[i]->poc;
2688 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2690 sorted[out_i]= src[i];
2693 if(best_poc == (dir ? INT_MIN : INT_MAX))
2695 limit= sorted[out_i++]->poc - dir;
2701 * fills the default_ref_list.
2703 static int fill_default_ref_list(H264Context *h){
2704 MpegEncContext * const s = &h->s;
2707 if(h->slice_type_nos==FF_B_TYPE){
2708 Picture *sorted[32];
2713 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2715 cur_poc= s->current_picture_ptr->poc;
2717 for(list= 0; list<2; list++){
2718 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2719 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2721 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2722 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2725 if(len < h->ref_count[list])
2726 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2730 if(lens[0] == lens[1] && lens[1] > 1){
2731 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2733 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2736 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2737 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2739 if(len < h->ref_count[0])
2740 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2743 for (i=0; i<h->ref_count[0]; i++) {
2744 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2746 if(h->slice_type_nos==FF_B_TYPE){
2747 for (i=0; i<h->ref_count[1]; i++) {
2748 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2755 static void print_short_term(H264Context *h);
2756 static void print_long_term(H264Context *h);
2759 * Extract structure information about the picture described by pic_num in
2760 * the current decoding context (frame or field). Note that pic_num is
2761 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2762 * @param pic_num picture number for which to extract structure information
2763 * @param structure one of PICT_XXX describing structure of picture
2765 * @return frame number (short term) or long term index of picture
2766 * described by pic_num
2768 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2769 MpegEncContext * const s = &h->s;
2771 *structure = s->picture_structure;
2774 /* opposite field */
2775 *structure ^= PICT_FRAME;
2782 static int decode_ref_pic_list_reordering(H264Context *h){
2783 MpegEncContext * const s = &h->s;
2784 int list, index, pic_structure;
2786 print_short_term(h);
2789 for(list=0; list<h->list_count; list++){
2790 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2792 if(get_bits1(&s->gb)){
2793 int pred= h->curr_pic_num;
2795 for(index=0; ; index++){
2796 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2797 unsigned int pic_id;
2799 Picture *ref = NULL;
2801 if(reordering_of_pic_nums_idc==3)
2804 if(index >= h->ref_count[list]){
2805 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2809 if(reordering_of_pic_nums_idc<3){
2810 if(reordering_of_pic_nums_idc<2){
2811 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2814 if(abs_diff_pic_num > h->max_pic_num){
2815 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2819 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2820 else pred+= abs_diff_pic_num;
2821 pred &= h->max_pic_num - 1;
2823 frame_num = pic_num_extract(h, pred, &pic_structure);
2825 for(i= h->short_ref_count-1; i>=0; i--){
2826 ref = h->short_ref[i];
2827 assert(ref->reference);
2828 assert(!ref->long_ref);
2830 ref->frame_num == frame_num &&
2831 (ref->reference & pic_structure)
2839 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2841 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2844 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2847 ref = h->long_ref[long_idx];
2848 assert(!(ref && !ref->reference));
2849 if(ref && (ref->reference & pic_structure)){
2850 ref->pic_id= pic_id;
2851 assert(ref->long_ref);
2859 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2860 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2862 for(i=index; i+1<h->ref_count[list]; i++){
2863 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2866 for(; i > index; i--){
2867 h->ref_list[list][i]= h->ref_list[list][i-1];
2869 h->ref_list[list][index]= *ref;
2871 pic_as_field(&h->ref_list[list][index], pic_structure);
2875 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2881 for(list=0; list<h->list_count; list++){
2882 for(index= 0; index < h->ref_count[list]; index++){
2883 if(!h->ref_list[list][index].data[0]){
2884 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2885 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2893 static void fill_mbaff_ref_list(H264Context *h){
2895 for(list=0; list<2; list++){ //FIXME try list_count
2896 for(i=0; i<h->ref_count[list]; i++){
2897 Picture *frame = &h->ref_list[list][i];
2898 Picture *field = &h->ref_list[list][16+2*i];
2901 field[0].linesize[j] <<= 1;
2902 field[0].reference = PICT_TOP_FIELD;
2903 field[0].poc= field[0].field_poc[0];
2904 field[1] = field[0];
2906 field[1].data[j] += frame->linesize[j];
2907 field[1].reference = PICT_BOTTOM_FIELD;
2908 field[1].poc= field[1].field_poc[1];
2910 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2911 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2913 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2914 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2918 for(j=0; j<h->ref_count[1]; j++){
2919 for(i=0; i<h->ref_count[0]; i++)
2920 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2921 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2922 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2926 static int pred_weight_table(H264Context *h){
2927 MpegEncContext * const s = &h->s;
2929 int luma_def, chroma_def;
2932 h->use_weight_chroma= 0;
2933 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2934 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2935 luma_def = 1<<h->luma_log2_weight_denom;
2936 chroma_def = 1<<h->chroma_log2_weight_denom;
2938 for(list=0; list<2; list++){
2939 for(i=0; i<h->ref_count[list]; i++){
2940 int luma_weight_flag, chroma_weight_flag;
2942 luma_weight_flag= get_bits1(&s->gb);
2943 if(luma_weight_flag){
2944 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2945 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2946 if( h->luma_weight[list][i] != luma_def
2947 || h->luma_offset[list][i] != 0)
2950 h->luma_weight[list][i]= luma_def;
2951 h->luma_offset[list][i]= 0;
2955 chroma_weight_flag= get_bits1(&s->gb);
2956 if(chroma_weight_flag){
2959 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2960 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2961 if( h->chroma_weight[list][i][j] != chroma_def
2962 || h->chroma_offset[list][i][j] != 0)
2963 h->use_weight_chroma= 1;
2968 h->chroma_weight[list][i][j]= chroma_def;
2969 h->chroma_offset[list][i][j]= 0;
2974 if(h->slice_type_nos != FF_B_TYPE) break;
2976 h->use_weight= h->use_weight || h->use_weight_chroma;
2980 static void implicit_weight_table(H264Context *h){
2981 MpegEncContext * const s = &h->s;
2983 int cur_poc = s->current_picture_ptr->poc;
2985 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2986 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2988 h->use_weight_chroma= 0;
2993 h->use_weight_chroma= 2;
2994 h->luma_log2_weight_denom= 5;
2995 h->chroma_log2_weight_denom= 5;
2997 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
2998 int poc0 = h->ref_list[0][ref0].poc;
2999 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3000 int poc1 = h->ref_list[1][ref1].poc;
3001 int td = av_clip(poc1 - poc0, -128, 127);
3003 int tb = av_clip(cur_poc - poc0, -128, 127);
3004 int tx = (16384 + (FFABS(td) >> 1)) / td;
3005 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3006 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3007 h->implicit_weight[ref0][ref1] = 32;
3009 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3011 h->implicit_weight[ref0][ref1] = 32;
3017 * Mark a picture as no longer needed for reference. The refmask
3018 * argument allows unreferencing of individual fields or the whole frame.
3019 * If the picture becomes entirely unreferenced, but is being held for
3020 * display purposes, it is marked as such.
3021 * @param refmask mask of fields to unreference; the mask is bitwise
3022 * anded with the reference marking of pic
3023 * @return non-zero if pic becomes entirely unreferenced (except possibly
3024 * for display purposes) zero if one of the fields remains in
3027 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3029 if (pic->reference &= refmask) {
3032 for(i = 0; h->delayed_pic[i]; i++)
3033 if(pic == h->delayed_pic[i]){
3034 pic->reference=DELAYED_PIC_REF;
3042 * instantaneous decoder refresh.
3044 static void idr(H264Context *h){
3047 for(i=0; i<16; i++){
3048 remove_long(h, i, 0);
3050 assert(h->long_ref_count==0);
3052 for(i=0; i<h->short_ref_count; i++){
3053 unreference_pic(h, h->short_ref[i], 0);
3054 h->short_ref[i]= NULL;
3056 h->short_ref_count=0;
3057 h->prev_frame_num= 0;
3058 h->prev_frame_num_offset= 0;
3063 /* forget old pics after a seek */
3064 static void flush_dpb(AVCodecContext *avctx){
3065 H264Context *h= avctx->priv_data;
3067 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3068 if(h->delayed_pic[i])
3069 h->delayed_pic[i]->reference= 0;
3070 h->delayed_pic[i]= NULL;
3072 h->outputed_poc= INT_MIN;
3074 if(h->s.current_picture_ptr)
3075 h->s.current_picture_ptr->reference= 0;
3076 h->s.first_field= 0;
3077 ff_mpeg_flush(avctx);
3081 * Find a Picture in the short term reference list by frame number.
3082 * @param frame_num frame number to search for
3083 * @param idx the index into h->short_ref where returned picture is found
3084 * undefined if no picture found.
3085 * @return pointer to the found picture, or NULL if no pic with the provided
3086 * frame number is found
3088 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3089 MpegEncContext * const s = &h->s;
3092 for(i=0; i<h->short_ref_count; i++){
3093 Picture *pic= h->short_ref[i];
3094 if(s->avctx->debug&FF_DEBUG_MMCO)
3095 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3096 if(pic->frame_num == frame_num) {
3105 * Remove a picture from the short term reference list by its index in
3106 * that list. This does no checking on the provided index; it is assumed
3107 * to be valid. Other list entries are shifted down.
3108 * @param i index into h->short_ref of picture to remove.
3110 static void remove_short_at_index(H264Context *h, int i){
3111 assert(i >= 0 && i < h->short_ref_count);
3112 h->short_ref[i]= NULL;
3113 if (--h->short_ref_count)
3114 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3119 * @return the removed picture or NULL if an error occurs
3121 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3122 MpegEncContext * const s = &h->s;
3126 if(s->avctx->debug&FF_DEBUG_MMCO)
3127 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3129 pic = find_short(h, frame_num, &i);
3131 if(unreference_pic(h, pic, ref_mask))
3132 remove_short_at_index(h, i);
3139 * Remove a picture from the long term reference list by its index in
3141 * @return the removed picture or NULL if an error occurs
3143 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3146 pic= h->long_ref[i];
3148 if(unreference_pic(h, pic, ref_mask)){
3149 assert(h->long_ref[i]->long_ref == 1);
3150 h->long_ref[i]->long_ref= 0;
3151 h->long_ref[i]= NULL;
3152 h->long_ref_count--;
3160 * print short term list
3162 static void print_short_term(H264Context *h) {
3164 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3165 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3166 for(i=0; i<h->short_ref_count; i++){
3167 Picture *pic= h->short_ref[i];
3168 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3174 * print long term list
3176 static void print_long_term(H264Context *h) {
3178 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3179 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3180 for(i = 0; i < 16; i++){
3181 Picture *pic= h->long_ref[i];
3183 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3190 * Executes the reference picture marking (memory management control operations).
3192 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3193 MpegEncContext * const s = &h->s;
3195 int current_ref_assigned=0;
3198 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3199 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3201 for(i=0; i<mmco_count; i++){
3202 int structure, frame_num;
3203 if(s->avctx->debug&FF_DEBUG_MMCO)
3204 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3206 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3207 || mmco[i].opcode == MMCO_SHORT2LONG){
3208 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3209 pic = find_short(h, frame_num, &j);
3211 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3212 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3213 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3218 switch(mmco[i].opcode){
3219 case MMCO_SHORT2UNUSED:
3220 if(s->avctx->debug&FF_DEBUG_MMCO)
3221 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3222 remove_short(h, frame_num, structure ^ PICT_FRAME);
3224 case MMCO_SHORT2LONG:
3225 if (h->long_ref[mmco[i].long_arg] != pic)
3226 remove_long(h, mmco[i].long_arg, 0);
3228 remove_short_at_index(h, j);
3229 h->long_ref[ mmco[i].long_arg ]= pic;
3230 if (h->long_ref[ mmco[i].long_arg ]){
3231 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3232 h->long_ref_count++;
3235 case MMCO_LONG2UNUSED:
3236 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3237 pic = h->long_ref[j];
3239 remove_long(h, j, structure ^ PICT_FRAME);
3240 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3241 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3244 // Comment below left from previous code as it is an interresting note.
3245 /* First field in pair is in short term list or
3246 * at a different long term index.
3247 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3248 * Report the problem and keep the pair where it is,
3249 * and mark this field valid.
3252 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3253 remove_long(h, mmco[i].long_arg, 0);
3255 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3256 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3257 h->long_ref_count++;
3260 s->current_picture_ptr->reference |= s->picture_structure;
3261 current_ref_assigned=1;
3263 case MMCO_SET_MAX_LONG:
3264 assert(mmco[i].long_arg <= 16);
3265 // just remove the long term which index is greater than new max
3266 for(j = mmco[i].long_arg; j<16; j++){
3267 remove_long(h, j, 0);
3271 while(h->short_ref_count){
3272 remove_short(h, h->short_ref[0]->frame_num, 0);
3274 for(j = 0; j < 16; j++) {
3275 remove_long(h, j, 0);
3277 s->current_picture_ptr->poc=
3278 s->current_picture_ptr->field_poc[0]=
3279 s->current_picture_ptr->field_poc[1]=
3283 s->current_picture_ptr->frame_num= 0;
3289 if (!current_ref_assigned) {
3290 /* Second field of complementary field pair; the first field of
3291 * which is already referenced. If short referenced, it
3292 * should be first entry in short_ref. If not, it must exist
3293 * in long_ref; trying to put it on the short list here is an
3294 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3296 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3297 /* Just mark the second field valid */
3298 s->current_picture_ptr->reference = PICT_FRAME;
3299 } else if (s->current_picture_ptr->long_ref) {
3300 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3301 "assignment for second field "
3302 "in complementary field pair "
3303 "(first field is long term)\n");
3305 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3307 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3310 if(h->short_ref_count)
3311 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3313 h->short_ref[0]= s->current_picture_ptr;
3314 h->short_ref_count++;
3315 s->current_picture_ptr->reference |= s->picture_structure;
3319 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3321 /* We have too many reference frames, probably due to corrupted
3322 * stream. Need to discard one frame. Prevents overrun of the
3323 * short_ref and long_ref buffers.
3325 av_log(h->s.avctx, AV_LOG_ERROR,
3326 "number of reference frames exceeds max (probably "
3327 "corrupt input), discarding one\n");
3329 if (h->long_ref_count && !h->short_ref_count) {
3330 for (i = 0; i < 16; ++i)
3335 remove_long(h, i, 0);
3337 pic = h->short_ref[h->short_ref_count - 1];
3338 remove_short(h, pic->frame_num, 0);
3342 print_short_term(h);
3347 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3348 MpegEncContext * const s = &h->s;
3352 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3353 s->broken_link= get_bits1(gb) -1;
3355 h->mmco[0].opcode= MMCO_LONG;
3356 h->mmco[0].long_arg= 0;
3360 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3361 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3362 MMCOOpcode opcode= get_ue_golomb(gb);
3364 h->mmco[i].opcode= opcode;
3365 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3366 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3367 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3368 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3372 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3373 unsigned int long_arg= get_ue_golomb(gb);
3374 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3375 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3378 h->mmco[i].long_arg= long_arg;
3381 if(opcode > (unsigned)MMCO_LONG){
3382 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3385 if(opcode == MMCO_END)
3390 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3392 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3393 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3394 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3395 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3397 if (FIELD_PICTURE) {
3398 h->mmco[0].short_pic_num *= 2;
3399 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3400 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3410 static int init_poc(H264Context *h){
3411 MpegEncContext * const s = &h->s;
3412 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3414 Picture *cur = s->current_picture_ptr;
3416 h->frame_num_offset= h->prev_frame_num_offset;
3417 if(h->frame_num < h->prev_frame_num)
3418 h->frame_num_offset += max_frame_num;
3420 if(h->sps.poc_type==0){
3421 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3423 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3424 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3425 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3426 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3428 h->poc_msb = h->prev_poc_msb;
3429 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3431 field_poc[1] = h->poc_msb + h->poc_lsb;
3432 if(s->picture_structure == PICT_FRAME)
3433 field_poc[1] += h->delta_poc_bottom;
3434 }else if(h->sps.poc_type==1){
3435 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3438 if(h->sps.poc_cycle_length != 0)
3439 abs_frame_num = h->frame_num_offset + h->frame_num;
3443 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3446 expected_delta_per_poc_cycle = 0;
3447 for(i=0; i < h->sps.poc_cycle_length; i++)
3448 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3450 if(abs_frame_num > 0){
3451 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3452 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3454 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3455 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3456 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3460 if(h->nal_ref_idc == 0)
3461 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3463 field_poc[0] = expectedpoc + h->delta_poc[0];
3464 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3466 if(s->picture_structure == PICT_FRAME)
3467 field_poc[1] += h->delta_poc[1];
3469 int poc= 2*(h->frame_num_offset + h->frame_num);
3478 if(s->picture_structure != PICT_BOTTOM_FIELD)
3479 s->current_picture_ptr->field_poc[0]= field_poc[0];
3480 if(s->picture_structure != PICT_TOP_FIELD)
3481 s->current_picture_ptr->field_poc[1]= field_poc[1];
3482 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3489 * initialize scan tables
3491 static void init_scan_tables(H264Context *h){
3492 MpegEncContext * const s = &h->s;
3494 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3495 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3496 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3498 for(i=0; i<16; i++){
3499 #define T(x) (x>>2) | ((x<<2) & 0xF)
3500 h->zigzag_scan[i] = T(zigzag_scan[i]);
3501 h-> field_scan[i] = T( field_scan[i]);
3505 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3506 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3507 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3508 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3509 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3511 for(i=0; i<64; i++){
3512 #define T(x) (x>>3) | ((x&7)<<3)
3513 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3514 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3515 h->field_scan8x8[i] = T(field_scan8x8[i]);
3516 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3520 if(h->sps.transform_bypass){ //FIXME same ugly
3521 h->zigzag_scan_q0 = zigzag_scan;
3522 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3523 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3524 h->field_scan_q0 = field_scan;
3525 h->field_scan8x8_q0 = field_scan8x8;
3526 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3528 h->zigzag_scan_q0 = h->zigzag_scan;
3529 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3530 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3531 h->field_scan_q0 = h->field_scan;
3532 h->field_scan8x8_q0 = h->field_scan8x8;
3533 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3538 * Replicates H264 "master" context to thread contexts.
3540 static void clone_slice(H264Context *dst, H264Context *src)
3542 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3543 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3544 dst->s.current_picture = src->s.current_picture;
3545 dst->s.linesize = src->s.linesize;
3546 dst->s.uvlinesize = src->s.uvlinesize;
3547 dst->s.first_field = src->s.first_field;
3549 dst->prev_poc_msb = src->prev_poc_msb;
3550 dst->prev_poc_lsb = src->prev_poc_lsb;
3551 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3552 dst->prev_frame_num = src->prev_frame_num;
3553 dst->short_ref_count = src->short_ref_count;
3555 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3556 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3557 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3558 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3560 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3561 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3565 * decodes a slice header.
3566 * This will also call MPV_common_init() and frame_start() as needed.
3568 * @param h h264context
3569 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3571 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3573 static int decode_slice_header(H264Context *h, H264Context *h0){
3574 MpegEncContext * const s = &h->s;
3575 MpegEncContext * const s0 = &h0->s;
3576 unsigned int first_mb_in_slice;
3577 unsigned int pps_id;
3578 int num_ref_idx_active_override_flag;
3579 unsigned int slice_type, tmp, i, j;
3580 int default_ref_list_done = 0;
3581 int last_pic_structure;
3583 s->dropable= h->nal_ref_idc == 0;
3585 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3586 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3587 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3589 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3590 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3593 first_mb_in_slice= get_ue_golomb(&s->gb);
3595 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3596 h0->current_slice = 0;
3597 if (!s0->first_field)
3598 s->current_picture_ptr= NULL;
3601 slice_type= get_ue_golomb(&s->gb);
3603 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3608 h->slice_type_fixed=1;
3610 h->slice_type_fixed=0;
3612 slice_type= golomb_to_pict_type[ slice_type ];
3613 if (slice_type == FF_I_TYPE
3614 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3615 default_ref_list_done = 1;
3617 h->slice_type= slice_type;
3618 h->slice_type_nos= slice_type & 3;
3620 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3621 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3622 av_log(h->s.avctx, AV_LOG_ERROR,
3623 "B picture before any references, skipping\n");
3627 pps_id= get_ue_golomb(&s->gb);
3628 if(pps_id>=MAX_PPS_COUNT){
3629 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3632 if(!h0->pps_buffers[pps_id]) {
3633 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3636 h->pps= *h0->pps_buffers[pps_id];
3638 if(!h0->sps_buffers[h->pps.sps_id]) {
3639 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3642 h->sps = *h0->sps_buffers[h->pps.sps_id];
3644 if(h == h0 && h->dequant_coeff_pps != pps_id){
3645 h->dequant_coeff_pps = pps_id;
3646 init_dequant_tables(h);
3649 s->mb_width= h->sps.mb_width;
3650 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3652 h->b_stride= s->mb_width*4;
3653 h->b8_stride= s->mb_width*2;
3655 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3656 if(h->sps.frame_mbs_only_flag)
3657 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3659 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3661 if (s->context_initialized
3662 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3664 return -1; // width / height changed during parallelized decoding
3666 flush_dpb(s->avctx);
3669 if (!s->context_initialized) {
3671 return -1; // we cant (re-)initialize context during parallel decoding
3672 if (MPV_common_init(s) < 0)
3676 init_scan_tables(h);
3679 for(i = 1; i < s->avctx->thread_count; i++) {
3681 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3682 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3683 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3686 init_scan_tables(c);
3690 for(i = 0; i < s->avctx->thread_count; i++)
3691 if(context_init(h->thread_context[i]) < 0)
3694 s->avctx->width = s->width;
3695 s->avctx->height = s->height;
3696 s->avctx->sample_aspect_ratio= h->sps.sar;
3697 if(!s->avctx->sample_aspect_ratio.den)
3698 s->avctx->sample_aspect_ratio.den = 1;
3700 if(h->sps.timing_info_present_flag){
3701 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3702 if(h->x264_build > 0 && h->x264_build < 44)
3703 s->avctx->time_base.den *= 2;
3704 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3705 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3709 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3712 h->mb_aff_frame = 0;
3713 last_pic_structure = s0->picture_structure;
3714 if(h->sps.frame_mbs_only_flag){
3715 s->picture_structure= PICT_FRAME;
3717 if(get_bits1(&s->gb)) { //field_pic_flag
3718 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3720 s->picture_structure= PICT_FRAME;
3721 h->mb_aff_frame = h->sps.mb_aff;
3724 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3726 if(h0->current_slice == 0){
3727 while(h->frame_num != h->prev_frame_num &&
3728 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3729 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3731 h->prev_frame_num++;
3732 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3733 s->current_picture_ptr->frame_num= h->prev_frame_num;
3734 execute_ref_pic_marking(h, NULL, 0);
3737 /* See if we have a decoded first field looking for a pair... */
3738 if (s0->first_field) {
3739 assert(s0->current_picture_ptr);
3740 assert(s0->current_picture_ptr->data[0]);
3741 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3743 /* figure out if we have a complementary field pair */
3744 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3746 * Previous field is unmatched. Don't display it, but let it
3747 * remain for reference if marked as such.
3749 s0->current_picture_ptr = NULL;
3750 s0->first_field = FIELD_PICTURE;
3753 if (h->nal_ref_idc &&
3754 s0->current_picture_ptr->reference &&
3755 s0->current_picture_ptr->frame_num != h->frame_num) {
3757 * This and previous field were reference, but had
3758 * different frame_nums. Consider this field first in
3759 * pair. Throw away previous field except for reference
3762 s0->first_field = 1;
3763 s0->current_picture_ptr = NULL;
3766 /* Second field in complementary pair */
3767 s0->first_field = 0;
3772 /* Frame or first field in a potentially complementary pair */
3773 assert(!s0->current_picture_ptr);
3774 s0->first_field = FIELD_PICTURE;
3777 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3778 s0->first_field = 0;
3785 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3787 assert(s->mb_num == s->mb_width * s->mb_height);
3788 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3789 first_mb_in_slice >= s->mb_num){
3790 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3793 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3794 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3795 if (s->picture_structure == PICT_BOTTOM_FIELD)
3796 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3797 assert(s->mb_y < s->mb_height);
3799 if(s->picture_structure==PICT_FRAME){
3800 h->curr_pic_num= h->frame_num;
3801 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3803 h->curr_pic_num= 2*h->frame_num + 1;
3804 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3807 if(h->nal_unit_type == NAL_IDR_SLICE){
3808 get_ue_golomb(&s->gb); /* idr_pic_id */
3811 if(h->sps.poc_type==0){
3812 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3814 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3815 h->delta_poc_bottom= get_se_golomb(&s->gb);
3819 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3820 h->delta_poc[0]= get_se_golomb(&s->gb);
3822 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3823 h->delta_poc[1]= get_se_golomb(&s->gb);
3828 if(h->pps.redundant_pic_cnt_present){
3829 h->redundant_pic_count= get_ue_golomb(&s->gb);
3832 //set defaults, might be overridden a few lines later
3833 h->ref_count[0]= h->pps.ref_count[0];
3834 h->ref_count[1]= h->pps.ref_count[1];
3836 if(h->slice_type_nos != FF_I_TYPE){
3837 if(h->slice_type_nos == FF_B_TYPE){
3838 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3840 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3842 if(num_ref_idx_active_override_flag){
3843 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3844 if(h->slice_type_nos==FF_B_TYPE)
3845 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3847 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3848 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3849 h->ref_count[0]= h->ref_count[1]= 1;
3853 if(h->slice_type_nos == FF_B_TYPE)
3860 if(!default_ref_list_done){
3861 fill_default_ref_list(h);
3864 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3867 if(h->slice_type_nos!=FF_I_TYPE){
3868 s->last_picture_ptr= &h->ref_list[0][0];
3869 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3871 if(h->slice_type_nos==FF_B_TYPE){
3872 s->next_picture_ptr= &h->ref_list[1][0];
3873 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3876 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3877 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3878 pred_weight_table(h);
3879 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3880 implicit_weight_table(h);
3885 decode_ref_pic_marking(h0, &s->gb);
3888 fill_mbaff_ref_list(h);
3890 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3891 direct_dist_scale_factor(h);
3892 direct_ref_list_init(h);
3894 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3895 tmp = get_ue_golomb(&s->gb);
3897 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3900 h->cabac_init_idc= tmp;
3903 h->last_qscale_diff = 0;
3904 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3906 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3910 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3911 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3912 //FIXME qscale / qp ... stuff
3913 if(h->slice_type == FF_SP_TYPE){
3914 get_bits1(&s->gb); /* sp_for_switch_flag */
3916 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3917 get_se_golomb(&s->gb); /* slice_qs_delta */
3920 h->deblocking_filter = 1;
3921 h->slice_alpha_c0_offset = 0;
3922 h->slice_beta_offset = 0;
3923 if( h->pps.deblocking_filter_parameters_present ) {
3924 tmp= get_ue_golomb(&s->gb);
3926 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3929 h->deblocking_filter= tmp;
3930 if(h->deblocking_filter < 2)
3931 h->deblocking_filter^= 1; // 1<->0
3933 if( h->deblocking_filter ) {
3934 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3935 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3939 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3940 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3941 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3942 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3943 h->deblocking_filter= 0;
3945 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3946 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3947 /* Cheat slightly for speed:
3948 Do not bother to deblock across slices. */
3949 h->deblocking_filter = 2;
3951 h0->max_contexts = 1;
3952 if(!h0->single_decode_warning) {
3953 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3954 h0->single_decode_warning = 1;
3957 return 1; // deblocking switched inside frame
3962 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3963 slice_group_change_cycle= get_bits(&s->gb, ?);
3966 h0->last_slice_type = slice_type;
3967 h->slice_num = ++h0->current_slice;
3968 if(h->slice_num >= MAX_SLICES){
3969 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3973 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3977 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3978 +(h->ref_list[j][i].reference&3);
3981 for(i=16; i<48; i++)
3982 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3983 +(h->ref_list[j][i].reference&3);
3986 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3987 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3989 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3990 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3992 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3994 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3995 pps_id, h->frame_num,
3996 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3997 h->ref_count[0], h->ref_count[1],
3999 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4001 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4002 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4012 static inline int get_level_prefix(GetBitContext *gb){
4016 OPEN_READER(re, gb);
4017 UPDATE_CACHE(re, gb);
4018 buf=GET_CACHE(re, gb);
4020 log= 32 - av_log2(buf);
4022 print_bin(buf>>(32-log), log);
4023 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4026 LAST_SKIP_BITS(re, gb, log);
4027 CLOSE_READER(re, gb);
4032 static inline int get_dct8x8_allowed(H264Context *h){
4033 if(h->sps.direct_8x8_inference_flag)
4034 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4036 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4040 * decodes a residual block.
4041 * @param n block index
4042 * @param scantable scantable
4043 * @param max_coeff number of coefficients in the block
4044 * @return <0 if an error occurred
4046 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4047 MpegEncContext * const s = &h->s;
4048 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4050 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4052 //FIXME put trailing_onex into the context
4054 if(n == CHROMA_DC_BLOCK_INDEX){
4055 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4056 total_coeff= coeff_token>>2;
4058 if(n == LUMA_DC_BLOCK_INDEX){
4059 total_coeff= pred_non_zero_count(h, 0);
4060 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4061 total_coeff= coeff_token>>2;
4063 total_coeff= pred_non_zero_count(h, n);
4064 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4065 total_coeff= coeff_token>>2;
4066 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4070 //FIXME set last_non_zero?
4074 if(total_coeff > (unsigned)max_coeff) {
4075 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4079 trailing_ones= coeff_token&3;
4080 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4081 assert(total_coeff<=16);
4083 i = show_bits(gb, 3);
4084 skip_bits(gb, trailing_ones);
4085 level[0] = 1-((i&4)>>1);
4086 level[1] = 1-((i&2) );
4087 level[2] = 1-((i&1)<<1);
4089 if(trailing_ones<total_coeff) {
4090 int level_code, mask;
4091 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4092 int prefix= get_level_prefix(gb);
4094 //first coefficient has suffix_length equal to 0 or 1
4095 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4097 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4099 level_code= (prefix<<suffix_length); //part
4100 }else if(prefix==14){
4102 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4104 level_code= prefix + get_bits(gb, 4); //part
4106 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4107 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4109 level_code += (1<<(prefix-3))-4096;
4112 if(trailing_ones < 3) level_code += 2;
4117 mask= -(level_code&1);
4118 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4120 //remaining coefficients have suffix_length > 0
4121 for(i=trailing_ones+1;i<total_coeff;i++) {
4122 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4123 prefix = get_level_prefix(gb);
4125 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4127 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4129 level_code += (1<<(prefix-3))-4096;
4131 mask= -(level_code&1);
4132 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4133 if(level_code > suffix_limit[suffix_length])
4138 if(total_coeff == max_coeff)
4141 if(n == CHROMA_DC_BLOCK_INDEX)
4142 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4144 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4147 coeff_num = zeros_left + total_coeff - 1;
4148 j = scantable[coeff_num];
4150 block[j] = level[0];
4151 for(i=1;i<total_coeff;i++) {
4154 else if(zeros_left < 7){
4155 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4157 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4159 zeros_left -= run_before;
4160 coeff_num -= 1 + run_before;
4161 j= scantable[ coeff_num ];
4166 block[j] = (level[0] * qmul[j] + 32)>>6;
4167 for(i=1;i<total_coeff;i++) {
4170 else if(zeros_left < 7){
4171 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4173 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4175 zeros_left -= run_before;
4176 coeff_num -= 1 + run_before;
4177 j= scantable[ coeff_num ];
4179 block[j]= (level[i] * qmul[j] + 32)>>6;
4184 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4191 static void predict_field_decoding_flag(H264Context *h){
4192 MpegEncContext * const s = &h->s;
4193 const int mb_xy= h->mb_xy;
4194 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4195 ? s->current_picture.mb_type[mb_xy-1]
4196 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4197 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4199 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4203 * decodes a P_SKIP or B_SKIP macroblock
4205 static void decode_mb_skip(H264Context *h){
4206 MpegEncContext * const s = &h->s;
4207 const int mb_xy= h->mb_xy;
4210 memset(h->non_zero_count[mb_xy], 0, 16);
4211 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4214 mb_type|= MB_TYPE_INTERLACED;
4216 if( h->slice_type_nos == FF_B_TYPE )
4218 // just for fill_caches. pred_direct_motion will set the real mb_type
4219 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4221 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4222 pred_direct_motion(h, &mb_type);
4223 mb_type|= MB_TYPE_SKIP;
4228 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4230 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4231 pred_pskip_motion(h, &mx, &my);
4232 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4233 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4236 write_back_motion(h, mb_type);
4237 s->current_picture.mb_type[mb_xy]= mb_type;
4238 s->current_picture.qscale_table[mb_xy]= s->qscale;
4239 h->slice_table[ mb_xy ]= h->slice_num;
4240 h->prev_mb_skipped= 1;
4244 * decodes a macroblock
4245 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4247 static int decode_mb_cavlc(H264Context *h){
4248 MpegEncContext * const s = &h->s;
4250 int partition_count;
4251 unsigned int mb_type, cbp;
4252 int dct8x8_allowed= h->pps.transform_8x8_mode;
4254 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4256 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4257 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4259 if(h->slice_type_nos != FF_I_TYPE){
4260 if(s->mb_skip_run==-1)
4261 s->mb_skip_run= get_ue_golomb(&s->gb);
4263 if (s->mb_skip_run--) {
4264 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4265 if(s->mb_skip_run==0)
4266 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4268 predict_field_decoding_flag(h);
4275 if( (s->mb_y&1) == 0 )
4276 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4279 h->prev_mb_skipped= 0;
4281 mb_type= get_ue_golomb(&s->gb);
4282 if(h->slice_type_nos == FF_B_TYPE){
4284 partition_count= b_mb_type_info[mb_type].partition_count;
4285 mb_type= b_mb_type_info[mb_type].type;
4288 goto decode_intra_mb;
4290 }else if(h->slice_type_nos == FF_P_TYPE){
4292 partition_count= p_mb_type_info[mb_type].partition_count;
4293 mb_type= p_mb_type_info[mb_type].type;
4296 goto decode_intra_mb;
4299 assert(h->slice_type_nos == FF_I_TYPE);
4300 if(h->slice_type == FF_SI_TYPE && mb_type)
4304 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4308 cbp= i_mb_type_info[mb_type].cbp;
4309 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4310 mb_type= i_mb_type_info[mb_type].type;
4314 mb_type |= MB_TYPE_INTERLACED;
4316 h->slice_table[ mb_xy ]= h->slice_num;
4318 if(IS_INTRA_PCM(mb_type)){
4321 // We assume these blocks are very rare so we do not optimize it.
4322 align_get_bits(&s->gb);
4324 // The pixels are stored in the same order as levels in h->mb array.
4325 for(x=0; x < (CHROMA ? 384 : 256); x++){
4326 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4329 // In deblocking, the quantizer is 0
4330 s->current_picture.qscale_table[mb_xy]= 0;
4331 // All coeffs are present
4332 memset(h->non_zero_count[mb_xy], 16, 16);
4334 s->current_picture.mb_type[mb_xy]= mb_type;
4339 h->ref_count[0] <<= 1;
4340 h->ref_count[1] <<= 1;
4343 fill_caches(h, mb_type, 0);
4346 if(IS_INTRA(mb_type)){
4348 // init_top_left_availability(h);
4349 if(IS_INTRA4x4(mb_type)){
4352 if(dct8x8_allowed && get_bits1(&s->gb)){
4353 mb_type |= MB_TYPE_8x8DCT;
4357 // fill_intra4x4_pred_table(h);
4358 for(i=0; i<16; i+=di){
4359 int mode= pred_intra_mode(h, i);
4361 if(!get_bits1(&s->gb)){
4362 const int rem_mode= get_bits(&s->gb, 3);
4363 mode = rem_mode + (rem_mode >= mode);
4367 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4369 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4371 write_back_intra_pred_mode(h);
4372 if( check_intra4x4_pred_mode(h) < 0)
4375 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4376 if(h->intra16x16_pred_mode < 0)
4380 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4383 h->chroma_pred_mode= pred_mode;
4385 }else if(partition_count==4){
4386 int i, j, sub_partition_count[4], list, ref[2][4];
4388 if(h->slice_type_nos == FF_B_TYPE){
4390 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4391 if(h->sub_mb_type[i] >=13){
4392 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4395 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4396 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4398 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4399 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4400 pred_direct_motion(h, &mb_type);
4401 h->ref_cache[0][scan8[4]] =
4402 h->ref_cache[1][scan8[4]] =
4403 h->ref_cache[0][scan8[12]] =
4404 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4407 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4409 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4410 if(h->sub_mb_type[i] >=4){
4411 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4414 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4415 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4419 for(list=0; list<h->list_count; list++){
4420 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4422 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4423 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4424 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4426 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4438 dct8x8_allowed = get_dct8x8_allowed(h);
4440 for(list=0; list<h->list_count; list++){
4442 if(IS_DIRECT(h->sub_mb_type[i])) {
4443 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4446 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4447 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4449 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4450 const int sub_mb_type= h->sub_mb_type[i];
4451 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4452 for(j=0; j<sub_partition_count[i]; j++){
4454 const int index= 4*i + block_width*j;
4455 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4456 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4457 mx += get_se_golomb(&s->gb);
4458 my += get_se_golomb(&s->gb);
4459 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4461 if(IS_SUB_8X8(sub_mb_type)){
4463 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4465 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4466 }else if(IS_SUB_8X4(sub_mb_type)){
4467 mv_cache[ 1 ][0]= mx;
4468 mv_cache[ 1 ][1]= my;
4469 }else if(IS_SUB_4X8(sub_mb_type)){
4470 mv_cache[ 8 ][0]= mx;
4471 mv_cache[ 8 ][1]= my;
4473 mv_cache[ 0 ][0]= mx;
4474 mv_cache[ 0 ][1]= my;
4477 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4483 }else if(IS_DIRECT(mb_type)){
4484 pred_direct_motion(h, &mb_type);
4485 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4487 int list, mx, my, i;
4488 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4489 if(IS_16X16(mb_type)){
4490 for(list=0; list<h->list_count; list++){
4492 if(IS_DIR(mb_type, 0, list)){
4493 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4494 if(val >= h->ref_count[list]){
4495 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4499 val= LIST_NOT_USED&0xFF;
4500 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4502 for(list=0; list<h->list_count; list++){
4504 if(IS_DIR(mb_type, 0, list)){
4505 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4506 mx += get_se_golomb(&s->gb);
4507 my += get_se_golomb(&s->gb);
4508 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4510 val= pack16to32(mx,my);
4513 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4516 else if(IS_16X8(mb_type)){
4517 for(list=0; list<h->list_count; list++){
4520 if(IS_DIR(mb_type, i, list)){
4521 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4522 if(val >= h->ref_count[list]){
4523 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4527 val= LIST_NOT_USED&0xFF;
4528 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4531 for(list=0; list<h->list_count; list++){
4534 if(IS_DIR(mb_type, i, list)){
4535 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4536 mx += get_se_golomb(&s->gb);
4537 my += get_se_golomb(&s->gb);
4538 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4540 val= pack16to32(mx,my);
4543 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4547 assert(IS_8X16(mb_type));
4548 for(list=0; list<h->list_count; list++){
4551 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4552 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4553 if(val >= h->ref_count[list]){
4554 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4558 val= LIST_NOT_USED&0xFF;
4559 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4562 for(list=0; list<h->list_count; list++){
4565 if(IS_DIR(mb_type, i, list)){
4566 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4567 mx += get_se_golomb(&s->gb);
4568 my += get_se_golomb(&s->gb);
4569 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4571 val= pack16to32(mx,my);
4574 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4580 if(IS_INTER(mb_type))
4581 write_back_motion(h, mb_type);
4583 if(!IS_INTRA16x16(mb_type)){
4584 cbp= get_ue_golomb(&s->gb);
4586 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4591 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4592 else cbp= golomb_to_inter_cbp [cbp];
4594 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4595 else cbp= golomb_to_inter_cbp_gray[cbp];
4600 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4601 if(get_bits1(&s->gb)){
4602 mb_type |= MB_TYPE_8x8DCT;
4603 h->cbp_table[mb_xy]= cbp;
4606 s->current_picture.mb_type[mb_xy]= mb_type;
4608 if(cbp || IS_INTRA16x16(mb_type)){
4609 int i8x8, i4x4, chroma_idx;
4611 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4612 const uint8_t *scan, *scan8x8, *dc_scan;
4614 // fill_non_zero_count_cache(h);
4616 if(IS_INTERLACED(mb_type)){
4617 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4618 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4619 dc_scan= luma_dc_field_scan;
4621 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4622 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4623 dc_scan= luma_dc_zigzag_scan;
4626 dquant= get_se_golomb(&s->gb);
4628 if( dquant > 25 || dquant < -26 ){
4629 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4633 s->qscale += dquant;
4634 if(((unsigned)s->qscale) > 51){
4635 if(s->qscale<0) s->qscale+= 52;
4636 else s->qscale-= 52;
4639 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4640 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4641 if(IS_INTRA16x16(mb_type)){
4642 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4643 return -1; //FIXME continue if partitioned and other return -1 too
4646 assert((cbp&15) == 0 || (cbp&15) == 15);
4649 for(i8x8=0; i8x8<4; i8x8++){
4650 for(i4x4=0; i4x4<4; i4x4++){
4651 const int index= i4x4 + 4*i8x8;
4652 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4658 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4661 for(i8x8=0; i8x8<4; i8x8++){
4662 if(cbp & (1<<i8x8)){
4663 if(IS_8x8DCT(mb_type)){
4664 DCTELEM *buf = &h->mb[64*i8x8];
4666 for(i4x4=0; i4x4<4; i4x4++){
4667 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4668 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4671 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4672 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4674 for(i4x4=0; i4x4<4; i4x4++){
4675 const int index= i4x4 + 4*i8x8;
4677 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4683 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4684 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4690 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4691 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4697 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4698 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4699 for(i4x4=0; i4x4<4; i4x4++){
4700 const int index= 16 + 4*chroma_idx + i4x4;
4701 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4707 uint8_t * const nnz= &h->non_zero_count_cache[0];
4708 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4709 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4712 uint8_t * const nnz= &h->non_zero_count_cache[0];
4713 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4714 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4715 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4717 s->current_picture.qscale_table[mb_xy]= s->qscale;
4718 write_back_non_zero_count(h);
4721 h->ref_count[0] >>= 1;
4722 h->ref_count[1] >>= 1;
4728 static int decode_cabac_field_decoding_flag(H264Context *h) {
4729 MpegEncContext * const s = &h->s;
4730 const int mb_x = s->mb_x;
4731 const int mb_y = s->mb_y & ~1;
4732 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4733 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4735 unsigned int ctx = 0;
4737 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4740 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4744 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4747 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4748 uint8_t *state= &h->cabac_state[ctx_base];
4752 MpegEncContext * const s = &h->s;
4753 const int mba_xy = h->left_mb_xy[0];
4754 const int mbb_xy = h->top_mb_xy;
4756 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4758 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4760 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4761 return 0; /* I4x4 */
4764 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4765 return 0; /* I4x4 */
4768 if( get_cabac_terminate( &h->cabac ) )
4769 return 25; /* PCM */
4771 mb_type = 1; /* I16x16 */
4772 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4773 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4774 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4775 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4776 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4780 static int decode_cabac_mb_type_b( H264Context *h ) {
4781 MpegEncContext * const s = &h->s;
4783 const int mba_xy = h->left_mb_xy[0];
4784 const int mbb_xy = h->top_mb_xy;
4787 assert(h->slice_type_nos == FF_B_TYPE);
4789 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4791 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4794 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4795 return 0; /* B_Direct_16x16 */
4797 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4798 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4801 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4802 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4803 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4804 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4806 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4807 else if( bits == 13 ) {
4808 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4809 } else if( bits == 14 )
4810 return 11; /* B_L1_L0_8x16 */
4811 else if( bits == 15 )
4812 return 22; /* B_8x8 */
4814 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4815 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4818 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4819 MpegEncContext * const s = &h->s;
4823 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4824 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4827 && h->slice_table[mba_xy] == h->slice_num
4828 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4829 mba_xy += s->mb_stride;
4831 mbb_xy = mb_xy - s->mb_stride;
4833 && h->slice_table[mbb_xy] == h->slice_num
4834 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4835 mbb_xy -= s->mb_stride;
4837 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4839 int mb_xy = h->mb_xy;
4841 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4844 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4846 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4849 if( h->slice_type_nos == FF_B_TYPE )
4851 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4854 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4857 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4860 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4861 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4862 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4864 if( mode >= pred_mode )
4870 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4871 const int mba_xy = h->left_mb_xy[0];
4872 const int mbb_xy = h->top_mb_xy;
4876 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4877 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4880 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4883 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4886 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4888 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4894 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4895 int cbp_b, cbp_a, ctx, cbp = 0;
4897 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4898 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4900 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4901 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4902 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4903 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4904 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4905 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4906 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4907 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4910 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4914 cbp_a = (h->left_cbp>>4)&0x03;
4915 cbp_b = (h-> top_cbp>>4)&0x03;
4918 if( cbp_a > 0 ) ctx++;
4919 if( cbp_b > 0 ) ctx += 2;
4920 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4924 if( cbp_a == 2 ) ctx++;
4925 if( cbp_b == 2 ) ctx += 2;
4926 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4928 static int decode_cabac_mb_dqp( H264Context *h) {
4929 int ctx= h->last_qscale_diff != 0;
4932 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4935 if(val > 102) //prevent infinite loop
4940 return (val + 1)>>1 ;
4942 return -((val + 1)>>1);
4944 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4945 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4947 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4949 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4953 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4955 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4956 return 0; /* B_Direct_8x8 */
4957 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4958 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4960 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4961 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4962 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4965 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4966 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4970 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4971 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4974 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4975 int refa = h->ref_cache[list][scan8[n] - 1];
4976 int refb = h->ref_cache[list][scan8[n] - 8];
4980 if( h->slice_type_nos == FF_B_TYPE) {
4981 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
4983 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
4992 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
4995 if(ref >= 32 /*h->ref_list[list]*/){
5002 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5003 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5004 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5005 int ctxbase = (l == 0) ? 40 : 47;
5007 int ctx = (amvd>2) + (amvd>32);
5009 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5014 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5022 while( get_cabac_bypass( &h->cabac ) ) {
5026 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5031 if( get_cabac_bypass( &h->cabac ) )
5035 return get_cabac_bypass_sign( &h->cabac, -mvd );
5038 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5044 nza = h->left_cbp&0x100;
5045 nzb = h-> top_cbp&0x100;
5047 nza = (h->left_cbp>>(6+idx))&0x01;
5048 nzb = (h-> top_cbp>>(6+idx))&0x01;
5051 assert(cat == 1 || cat == 2 || cat == 4);
5052 nza = h->non_zero_count_cache[scan8[idx] - 1];
5053 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5062 return ctx + 4 * cat;
5065 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5066 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5067 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5068 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5069 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5072 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5073 static const int significant_coeff_flag_offset[2][6] = {
5074 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5075 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5077 static const int last_coeff_flag_offset[2][6] = {
5078 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5079 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5081 static const int coeff_abs_level_m1_offset[6] = {
5082 227+0, 227+10, 227+20, 227+30, 227+39, 426
5084 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5085 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5086 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5087 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5088 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5089 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5090 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5091 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5092 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5094 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5095 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5096 * map node ctx => cabac ctx for level=1 */
5097 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5098 /* map node ctx => cabac ctx for level>1 */
5099 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5100 static const uint8_t coeff_abs_level_transition[2][8] = {
5101 /* update node ctx after decoding a level=1 */
5102 { 1, 2, 3, 3, 4, 5, 6, 7 },
5103 /* update node ctx after decoding a level>1 */
5104 { 4, 4, 4, 4, 5, 6, 7, 7 }
5110 int coeff_count = 0;
5113 uint8_t *significant_coeff_ctx_base;
5114 uint8_t *last_coeff_ctx_base;
5115 uint8_t *abs_level_m1_ctx_base;
5118 #define CABAC_ON_STACK
5120 #ifdef CABAC_ON_STACK
5123 cc.range = h->cabac.range;
5124 cc.low = h->cabac.low;
5125 cc.bytestream= h->cabac.bytestream;
5127 #define CC &h->cabac
5131 /* cat: 0-> DC 16x16 n = 0
5132 * 1-> AC 16x16 n = luma4x4idx
5133 * 2-> Luma4x4 n = luma4x4idx
5134 * 3-> DC Chroma n = iCbCr
5135 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5136 * 5-> Luma8x8 n = 4 * luma8x8idx
5139 /* read coded block flag */
5140 if( is_dc || cat != 5 ) {
5141 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5143 h->non_zero_count_cache[scan8[n]] = 0;
5145 #ifdef CABAC_ON_STACK
5146 h->cabac.range = cc.range ;
5147 h->cabac.low = cc.low ;
5148 h->cabac.bytestream= cc.bytestream;
5154 significant_coeff_ctx_base = h->cabac_state
5155 + significant_coeff_flag_offset[MB_FIELD][cat];
5156 last_coeff_ctx_base = h->cabac_state
5157 + last_coeff_flag_offset[MB_FIELD][cat];
5158 abs_level_m1_ctx_base = h->cabac_state
5159 + coeff_abs_level_m1_offset[cat];
5161 if( !is_dc && cat == 5 ) {
5162 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5163 for(last= 0; last < coefs; last++) { \
5164 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5165 if( get_cabac( CC, sig_ctx )) { \
5166 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5167 index[coeff_count++] = last; \
5168 if( get_cabac( CC, last_ctx ) ) { \
5174 if( last == max_coeff -1 ) {\
5175 index[coeff_count++] = last;\
5177 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5178 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5179 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5181 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5183 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5185 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5188 assert(coeff_count > 0);
5192 h->cbp_table[h->mb_xy] |= 0x100;
5194 h->cbp_table[h->mb_xy] |= 0x40 << n;
5197 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5199 assert( cat == 1 || cat == 2 || cat == 4 );
5200 h->non_zero_count_cache[scan8[n]] = coeff_count;
5205 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5207 int j= scantable[index[--coeff_count]];
5209 if( get_cabac( CC, ctx ) == 0 ) {
5210 node_ctx = coeff_abs_level_transition[0][node_ctx];
5212 block[j] = get_cabac_bypass_sign( CC, -1);
5214 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5218 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5219 node_ctx = coeff_abs_level_transition[1][node_ctx];
5221 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5225 if( coeff_abs >= 15 ) {
5227 while( get_cabac_bypass( CC ) ) {
5233 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5239 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5241 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5244 } while( coeff_count );
5245 #ifdef CABAC_ON_STACK
5246 h->cabac.range = cc.range ;
5247 h->cabac.low = cc.low ;
5248 h->cabac.bytestream= cc.bytestream;
5253 #ifndef CONFIG_SMALL
5254 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5255 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5258 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5259 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5263 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5265 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5267 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5268 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5272 static inline void compute_mb_neighbors(H264Context *h)
5274 MpegEncContext * const s = &h->s;
5275 const int mb_xy = h->mb_xy;
5276 h->top_mb_xy = mb_xy - s->mb_stride;
5277 h->left_mb_xy[0] = mb_xy - 1;
5279 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5280 const int top_pair_xy = pair_xy - s->mb_stride;
5281 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5282 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5283 const int curr_mb_field_flag = MB_FIELD;
5284 const int bottom = (s->mb_y & 1);
5286 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5287 h->top_mb_xy -= s->mb_stride;
5289 if (!left_mb_field_flag == curr_mb_field_flag) {
5290 h->left_mb_xy[0] = pair_xy - 1;
5292 } else if (FIELD_PICTURE) {
5293 h->top_mb_xy -= s->mb_stride;
5299 * decodes a macroblock
5300 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5302 static int decode_mb_cabac(H264Context *h) {
5303 MpegEncContext * const s = &h->s;
5305 int mb_type, partition_count, cbp = 0;
5306 int dct8x8_allowed= h->pps.transform_8x8_mode;
5308 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5310 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5311 if( h->slice_type_nos != FF_I_TYPE ) {
5313 /* a skipped mb needs the aff flag from the following mb */
5314 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5315 predict_field_decoding_flag(h);
5316 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5317 skip = h->next_mb_skipped;
5319 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5320 /* read skip flags */
5322 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5323 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5324 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5325 if(!h->next_mb_skipped)
5326 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5331 h->cbp_table[mb_xy] = 0;
5332 h->chroma_pred_mode_table[mb_xy] = 0;
5333 h->last_qscale_diff = 0;
5340 if( (s->mb_y&1) == 0 )
5342 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5345 h->prev_mb_skipped = 0;
5347 compute_mb_neighbors(h);
5349 if( h->slice_type_nos == FF_B_TYPE ) {
5350 mb_type = decode_cabac_mb_type_b( h );
5352 partition_count= b_mb_type_info[mb_type].partition_count;
5353 mb_type= b_mb_type_info[mb_type].type;
5356 goto decode_intra_mb;
5358 } else if( h->slice_type_nos == FF_P_TYPE ) {
5359 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5361 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5362 /* P_L0_D16x16, P_8x8 */
5363 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5365 /* P_L0_D8x16, P_L0_D16x8 */
5366 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5368 partition_count= p_mb_type_info[mb_type].partition_count;
5369 mb_type= p_mb_type_info[mb_type].type;
5371 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5372 goto decode_intra_mb;
5375 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5376 if(h->slice_type == FF_SI_TYPE && mb_type)
5378 assert(h->slice_type_nos == FF_I_TYPE);
5380 partition_count = 0;
5381 cbp= i_mb_type_info[mb_type].cbp;
5382 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5383 mb_type= i_mb_type_info[mb_type].type;
5386 mb_type |= MB_TYPE_INTERLACED;
5388 h->slice_table[ mb_xy ]= h->slice_num;
5390 if(IS_INTRA_PCM(mb_type)) {
5393 // We assume these blocks are very rare so we do not optimize it.
5394 // FIXME The two following lines get the bitstream position in the cabac
5395 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5396 ptr= h->cabac.bytestream;
5397 if(h->cabac.low&0x1) ptr--;
5399 if(h->cabac.low&0x1FF) ptr--;
5402 // The pixels are stored in the same order as levels in h->mb array.
5403 memcpy(h->mb, ptr, 256); ptr+=256;
5405 memcpy(h->mb+128, ptr, 128); ptr+=128;
5408 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5410 // All blocks are present
5411 h->cbp_table[mb_xy] = 0x1ef;
5412 h->chroma_pred_mode_table[mb_xy] = 0;
5413 // In deblocking, the quantizer is 0
5414 s->current_picture.qscale_table[mb_xy]= 0;
5415 // All coeffs are present
5416 memset(h->non_zero_count[mb_xy], 16, 16);
5417 s->current_picture.mb_type[mb_xy]= mb_type;
5418 h->last_qscale_diff = 0;
5423 h->ref_count[0] <<= 1;
5424 h->ref_count[1] <<= 1;
5427 fill_caches(h, mb_type, 0);
5429 if( IS_INTRA( mb_type ) ) {
5431 if( IS_INTRA4x4( mb_type ) ) {
5432 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5433 mb_type |= MB_TYPE_8x8DCT;
5434 for( i = 0; i < 16; i+=4 ) {
5435 int pred = pred_intra_mode( h, i );
5436 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5437 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5440 for( i = 0; i < 16; i++ ) {
5441 int pred = pred_intra_mode( h, i );
5442 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5444 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5447 write_back_intra_pred_mode(h);
5448 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5450 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5451 if( h->intra16x16_pred_mode < 0 ) return -1;
5454 h->chroma_pred_mode_table[mb_xy] =
5455 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5457 pred_mode= check_intra_pred_mode( h, pred_mode );
5458 if( pred_mode < 0 ) return -1;
5459 h->chroma_pred_mode= pred_mode;
5461 } else if( partition_count == 4 ) {
5462 int i, j, sub_partition_count[4], list, ref[2][4];
5464 if( h->slice_type_nos == FF_B_TYPE ) {
5465 for( i = 0; i < 4; i++ ) {
5466 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5467 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5468 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5470 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5471 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5472 pred_direct_motion(h, &mb_type);
5473 h->ref_cache[0][scan8[4]] =
5474 h->ref_cache[1][scan8[4]] =
5475 h->ref_cache[0][scan8[12]] =
5476 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5477 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5478 for( i = 0; i < 4; i++ )
5479 if( IS_DIRECT(h->sub_mb_type[i]) )
5480 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5484 for( i = 0; i < 4; i++ ) {
5485 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5486 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5487 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5491 for( list = 0; list < h->list_count; list++ ) {
5492 for( i = 0; i < 4; i++ ) {
5493 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5494 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5495 if( h->ref_count[list] > 1 ){
5496 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5497 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5498 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5506 h->ref_cache[list][ scan8[4*i]+1 ]=
5507 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5512 dct8x8_allowed = get_dct8x8_allowed(h);
5514 for(list=0; list<h->list_count; list++){
5516 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5517 if(IS_DIRECT(h->sub_mb_type[i])){
5518 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5522 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5523 const int sub_mb_type= h->sub_mb_type[i];
5524 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5525 for(j=0; j<sub_partition_count[i]; j++){
5528 const int index= 4*i + block_width*j;
5529 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5530 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5531 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5533 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5534 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5535 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5537 if(IS_SUB_8X8(sub_mb_type)){
5539 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5541 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5544 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5546 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5547 }else if(IS_SUB_8X4(sub_mb_type)){
5548 mv_cache[ 1 ][0]= mx;
5549 mv_cache[ 1 ][1]= my;
5551 mvd_cache[ 1 ][0]= mx - mpx;
5552 mvd_cache[ 1 ][1]= my - mpy;
5553 }else if(IS_SUB_4X8(sub_mb_type)){
5554 mv_cache[ 8 ][0]= mx;
5555 mv_cache[ 8 ][1]= my;
5557 mvd_cache[ 8 ][0]= mx - mpx;
5558 mvd_cache[ 8 ][1]= my - mpy;
5560 mv_cache[ 0 ][0]= mx;
5561 mv_cache[ 0 ][1]= my;
5563 mvd_cache[ 0 ][0]= mx - mpx;
5564 mvd_cache[ 0 ][1]= my - mpy;
5567 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5568 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5569 p[0] = p[1] = p[8] = p[9] = 0;
5570 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5574 } else if( IS_DIRECT(mb_type) ) {
5575 pred_direct_motion(h, &mb_type);
5576 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5577 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5578 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5580 int list, mx, my, i, mpx, mpy;
5581 if(IS_16X16(mb_type)){
5582 for(list=0; list<h->list_count; list++){
5583 if(IS_DIR(mb_type, 0, list)){
5585 if(h->ref_count[list] > 1){
5586 ref= decode_cabac_mb_ref(h, list, 0);
5587 if(ref >= (unsigned)h->ref_count[list]){
5588 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5593 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5595 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5597 for(list=0; list<h->list_count; list++){
5598 if(IS_DIR(mb_type, 0, list)){
5599 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5601 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5602 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5603 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5605 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5606 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5608 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5611 else if(IS_16X8(mb_type)){
5612 for(list=0; list<h->list_count; list++){
5614 if(IS_DIR(mb_type, i, list)){
5616 if(h->ref_count[list] > 1){
5617 ref= decode_cabac_mb_ref( h, list, 8*i );
5618 if(ref >= (unsigned)h->ref_count[list]){
5619 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5624 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5626 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5629 for(list=0; list<h->list_count; list++){
5631 if(IS_DIR(mb_type, i, list)){
5632 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5633 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5634 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5635 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5637 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5638 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5640 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5641 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5646 assert(IS_8X16(mb_type));
5647 for(list=0; list<h->list_count; list++){
5649 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5651 if(h->ref_count[list] > 1){
5652 ref= decode_cabac_mb_ref( h, list, 4*i );
5653 if(ref >= (unsigned)h->ref_count[list]){
5654 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5659 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5661 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5664 for(list=0; list<h->list_count; list++){
5666 if(IS_DIR(mb_type, i, list)){
5667 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5668 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5669 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5671 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5672 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5673 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5675 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5676 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5683 if( IS_INTER( mb_type ) ) {
5684 h->chroma_pred_mode_table[mb_xy] = 0;
5685 write_back_motion( h, mb_type );
5688 if( !IS_INTRA16x16( mb_type ) ) {
5689 cbp = decode_cabac_mb_cbp_luma( h );
5691 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5694 h->cbp_table[mb_xy] = h->cbp = cbp;
5696 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5697 if( decode_cabac_mb_transform_size( h ) )
5698 mb_type |= MB_TYPE_8x8DCT;
5700 s->current_picture.mb_type[mb_xy]= mb_type;
5702 if( cbp || IS_INTRA16x16( mb_type ) ) {
5703 const uint8_t *scan, *scan8x8, *dc_scan;
5704 const uint32_t *qmul;
5707 if(IS_INTERLACED(mb_type)){
5708 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5709 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5710 dc_scan= luma_dc_field_scan;
5712 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5713 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5714 dc_scan= luma_dc_zigzag_scan;
5717 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5718 if( dqp == INT_MIN ){
5719 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5723 if(((unsigned)s->qscale) > 51){
5724 if(s->qscale<0) s->qscale+= 52;
5725 else s->qscale-= 52;
5727 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5728 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5730 if( IS_INTRA16x16( mb_type ) ) {
5732 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5733 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5736 qmul = h->dequant4_coeff[0][s->qscale];
5737 for( i = 0; i < 16; i++ ) {
5738 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5739 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5742 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5746 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5747 if( cbp & (1<<i8x8) ) {
5748 if( IS_8x8DCT(mb_type) ) {
5749 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5750 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5752 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5753 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5754 const int index = 4*i8x8 + i4x4;
5755 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5757 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5758 //STOP_TIMER("decode_residual")
5762 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5763 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5770 for( c = 0; c < 2; c++ ) {
5771 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5772 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5778 for( c = 0; c < 2; c++ ) {
5779 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5780 for( i = 0; i < 4; i++ ) {
5781 const int index = 16 + 4 * c + i;
5782 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5783 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5787 uint8_t * const nnz= &h->non_zero_count_cache[0];
5788 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5789 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5792 uint8_t * const nnz= &h->non_zero_count_cache[0];
5793 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5794 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5795 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5796 h->last_qscale_diff = 0;
5799 s->current_picture.qscale_table[mb_xy]= s->qscale;
5800 write_back_non_zero_count(h);
5803 h->ref_count[0] >>= 1;
5804 h->ref_count[1] >>= 1;
5811 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5812 const int index_a = qp + h->slice_alpha_c0_offset;
5813 const int alpha = (alpha_table+52)[index_a];
5814 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5818 tc[0] = (tc0_table+52)[index_a][bS[0]];
5819 tc[1] = (tc0_table+52)[index_a][bS[1]];
5820 tc[2] = (tc0_table+52)[index_a][bS[2]];
5821 tc[3] = (tc0_table+52)[index_a][bS[3]];
5822 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5824 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5827 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5828 const int index_a = qp + h->slice_alpha_c0_offset;
5829 const int alpha = (alpha_table+52)[index_a];
5830 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5834 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5835 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5836 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5837 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5838 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5840 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5844 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5846 for( i = 0; i < 16; i++, pix += stride) {
5852 int bS_index = (i >> 1);
5855 bS_index |= (i & 1);
5858 if( bS[bS_index] == 0 ) {
5862 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5863 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5864 alpha = (alpha_table+52)[index_a];
5865 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5867 if( bS[bS_index] < 4 ) {
5868 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5869 const int p0 = pix[-1];
5870 const int p1 = pix[-2];
5871 const int p2 = pix[-3];
5872 const int q0 = pix[0];
5873 const int q1 = pix[1];
5874 const int q2 = pix[2];
5876 if( FFABS( p0 - q0 ) < alpha &&
5877 FFABS( p1 - p0 ) < beta &&
5878 FFABS( q1 - q0 ) < beta ) {
5882 if( FFABS( p2 - p0 ) < beta ) {
5883 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5886 if( FFABS( q2 - q0 ) < beta ) {
5887 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5891 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5892 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5893 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5894 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5897 const int p0 = pix[-1];
5898 const int p1 = pix[-2];
5899 const int p2 = pix[-3];
5901 const int q0 = pix[0];
5902 const int q1 = pix[1];
5903 const int q2 = pix[2];
5905 if( FFABS( p0 - q0 ) < alpha &&
5906 FFABS( p1 - p0 ) < beta &&
5907 FFABS( q1 - q0 ) < beta ) {
5909 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5910 if( FFABS( p2 - p0 ) < beta)
5912 const int p3 = pix[-4];
5914 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5915 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5916 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5919 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5921 if( FFABS( q2 - q0 ) < beta)
5923 const int q3 = pix[3];
5925 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5926 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5927 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5930 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5934 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5935 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5937 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5942 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5944 for( i = 0; i < 8; i++, pix += stride) {
5952 if( bS[bS_index] == 0 ) {
5956 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5957 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5958 alpha = (alpha_table+52)[index_a];
5959 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5961 if( bS[bS_index] < 4 ) {
5962 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
5963 const int p0 = pix[-1];
5964 const int p1 = pix[-2];
5965 const int q0 = pix[0];
5966 const int q1 = pix[1];
5968 if( FFABS( p0 - q0 ) < alpha &&
5969 FFABS( p1 - p0 ) < beta &&
5970 FFABS( q1 - q0 ) < beta ) {
5971 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5973 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5974 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5975 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5978 const int p0 = pix[-1];
5979 const int p1 = pix[-2];
5980 const int q0 = pix[0];
5981 const int q1 = pix[1];
5983 if( FFABS( p0 - q0 ) < alpha &&
5984 FFABS( p1 - p0 ) < beta &&
5985 FFABS( q1 - q0 ) < beta ) {
5987 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
5988 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
5989 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5995 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5996 const int index_a = qp + h->slice_alpha_c0_offset;
5997 const int alpha = (alpha_table+52)[index_a];
5998 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6002 tc[0] = (tc0_table+52)[index_a][bS[0]];
6003 tc[1] = (tc0_table+52)[index_a][bS[1]];
6004 tc[2] = (tc0_table+52)[index_a][bS[2]];
6005 tc[3] = (tc0_table+52)[index_a][bS[3]];
6006 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6008 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6012 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6013 const int index_a = qp + h->slice_alpha_c0_offset;
6014 const int alpha = (alpha_table+52)[index_a];
6015 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6019 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6020 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6021 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6022 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6023 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6025 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6029 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6030 MpegEncContext * const s = &h->s;
6031 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6033 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6037 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6038 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6039 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6040 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6041 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6044 assert(!FRAME_MBAFF);
6046 mb_type = s->current_picture.mb_type[mb_xy];
6047 qp = s->current_picture.qscale_table[mb_xy];
6048 qp0 = s->current_picture.qscale_table[mb_xy-1];
6049 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6050 qpc = get_chroma_qp( h, 0, qp );
6051 qpc0 = get_chroma_qp( h, 0, qp0 );
6052 qpc1 = get_chroma_qp( h, 0, qp1 );
6053 qp0 = (qp + qp0 + 1) >> 1;
6054 qp1 = (qp + qp1 + 1) >> 1;
6055 qpc0 = (qpc + qpc0 + 1) >> 1;
6056 qpc1 = (qpc + qpc1 + 1) >> 1;
6057 qp_thresh = 15 - h->slice_alpha_c0_offset;
6058 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6059 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6062 if( IS_INTRA(mb_type) ) {
6063 int16_t bS4[4] = {4,4,4,4};
6064 int16_t bS3[4] = {3,3,3,3};
6065 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6066 if( IS_8x8DCT(mb_type) ) {
6067 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6068 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6069 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6070 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6072 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6073 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6074 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6075 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6076 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6077 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6078 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6079 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6081 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6082 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6083 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6084 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6085 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6086 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6087 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6088 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6091 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6092 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6094 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6096 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6098 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6099 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6100 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6101 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6103 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6104 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6105 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6106 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6108 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6109 bSv[0][0] = 0x0004000400040004ULL;
6110 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6111 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6113 #define FILTER(hv,dir,edge)\
6114 if(bSv[dir][edge]) {\
6115 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6117 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6118 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6124 } else if( IS_8x8DCT(mb_type) ) {
6144 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6145 MpegEncContext * const s = &h->s;
6147 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6148 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6149 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6150 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6151 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6153 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6154 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6155 // how often to recheck mv-based bS when iterating between edges
6156 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6157 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6158 // how often to recheck mv-based bS when iterating along each edge
6159 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6161 if (first_vertical_edge_done) {
6165 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6168 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6169 && !IS_INTERLACED(mb_type)
6170 && IS_INTERLACED(mbm_type)
6172 // This is a special case in the norm where the filtering must
6173 // be done twice (one each of the field) even if we are in a
6174 // frame macroblock.
6176 static const int nnz_idx[4] = {4,5,6,3};
6177 unsigned int tmp_linesize = 2 * linesize;
6178 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6179 int mbn_xy = mb_xy - 2 * s->mb_stride;
6184 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6185 if( IS_INTRA(mb_type) ||
6186 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6187 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6189 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6190 for( i = 0; i < 4; i++ ) {
6191 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6192 mbn_nnz[nnz_idx[i]] != 0 )
6198 // Do not use s->qscale as luma quantizer because it has not the same
6199 // value in IPCM macroblocks.
6200 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6201 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6202 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6203 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6204 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6205 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6206 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6207 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6214 for( edge = start; edge < edges; edge++ ) {
6215 /* mbn_xy: neighbor macroblock */
6216 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6217 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6218 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6222 if( (edge&1) && IS_8x8DCT(mb_type) )
6225 if( IS_INTRA(mb_type) ||
6226 IS_INTRA(mbn_type) ) {
6229 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6230 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6239 bS[0] = bS[1] = bS[2] = bS[3] = value;
6244 if( edge & mask_edge ) {
6245 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6248 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6249 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6252 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6253 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6254 int bn_idx= b_idx - (dir ? 8:1);
6257 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6258 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6259 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6260 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6263 if(h->slice_type_nos == FF_B_TYPE && v){
6265 for( l = 0; !v && l < 2; l++ ) {
6267 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6268 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6269 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6273 bS[0] = bS[1] = bS[2] = bS[3] = v;
6279 for( i = 0; i < 4; i++ ) {
6280 int x = dir == 0 ? edge : i;
6281 int y = dir == 0 ? i : edge;
6282 int b_idx= 8 + 4 + x + 8*y;
6283 int bn_idx= b_idx - (dir ? 8:1);
6285 if( h->non_zero_count_cache[b_idx] |
6286 h->non_zero_count_cache[bn_idx] ) {
6292 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6293 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6294 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6295 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6301 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6303 for( l = 0; l < 2; l++ ) {
6305 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6306 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6307 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6316 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6321 // Do not use s->qscale as luma quantizer because it has not the same
6322 // value in IPCM macroblocks.
6323 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6324 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6325 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6326 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6328 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6329 if( (edge&1) == 0 ) {
6330 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6331 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6332 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6333 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6336 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6337 if( (edge&1) == 0 ) {
6338 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6339 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6340 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6341 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6347 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6348 MpegEncContext * const s = &h->s;
6349 const int mb_xy= mb_x + mb_y*s->mb_stride;
6350 const int mb_type = s->current_picture.mb_type[mb_xy];
6351 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6352 int first_vertical_edge_done = 0;
6355 //for sufficiently low qp, filtering wouldn't do anything
6356 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6358 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6359 int qp = s->current_picture.qscale_table[mb_xy];
6361 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6362 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6367 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6368 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6369 int top_type, left_type[2];
6370 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6371 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6372 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6374 if(IS_8x8DCT(top_type)){
6375 h->non_zero_count_cache[4+8*0]=
6376 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6377 h->non_zero_count_cache[6+8*0]=
6378 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6380 if(IS_8x8DCT(left_type[0])){
6381 h->non_zero_count_cache[3+8*1]=
6382 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6384 if(IS_8x8DCT(left_type[1])){
6385 h->non_zero_count_cache[3+8*3]=
6386 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6389 if(IS_8x8DCT(mb_type)){
6390 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6391 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6393 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6394 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6396 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6397 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6399 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6400 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6405 // left mb is in picture
6406 && h->slice_table[mb_xy-1] != 0xFFFF
6407 // and current and left pair do not have the same interlaced type
6408 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6409 // and left mb is in the same slice if deblocking_filter == 2
6410 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6411 /* First vertical edge is different in MBAFF frames
6412 * There are 8 different bS to compute and 2 different Qp
6414 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6415 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6420 int mb_qp, mbn0_qp, mbn1_qp;
6422 first_vertical_edge_done = 1;
6424 if( IS_INTRA(mb_type) )
6425 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6427 for( i = 0; i < 8; i++ ) {
6428 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6430 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6432 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6433 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6434 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6436 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6443 mb_qp = s->current_picture.qscale_table[mb_xy];
6444 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6445 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6446 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6447 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6448 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6449 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6450 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6451 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6452 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6453 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6454 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6455 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6458 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6459 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6460 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6461 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6462 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6466 for( dir = 0; dir < 2; dir++ )
6467 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6469 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6470 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6474 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6475 H264Context *h = *(void**)arg;
6476 MpegEncContext * const s = &h->s;
6477 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6481 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6482 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6484 if( h->pps.cabac ) {
6488 align_get_bits( &s->gb );
6491 ff_init_cabac_states( &h->cabac);
6492 ff_init_cabac_decoder( &h->cabac,
6493 s->gb.buffer + get_bits_count(&s->gb)/8,
6494 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6495 /* calculate pre-state */
6496 for( i= 0; i < 460; i++ ) {
6498 if( h->slice_type_nos == FF_I_TYPE )
6499 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6501 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6504 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6506 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6511 int ret = decode_mb_cabac(h);
6513 //STOP_TIMER("decode_mb_cabac")
6515 if(ret>=0) hl_decode_mb(h);
6517 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6520 ret = decode_mb_cabac(h);
6522 if(ret>=0) hl_decode_mb(h);
6525 eos = get_cabac_terminate( &h->cabac );
6527 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6528 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6529 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6533 if( ++s->mb_x >= s->mb_width ) {
6535 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6537 if(FIELD_OR_MBAFF_PICTURE) {
6542 if( eos || s->mb_y >= s->mb_height ) {
6543 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6544 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6551 int ret = decode_mb_cavlc(h);
6553 if(ret>=0) hl_decode_mb(h);
6555 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6557 ret = decode_mb_cavlc(h);
6559 if(ret>=0) hl_decode_mb(h);
6564 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6565 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6570 if(++s->mb_x >= s->mb_width){
6572 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6574 if(FIELD_OR_MBAFF_PICTURE) {
6577 if(s->mb_y >= s->mb_height){
6578 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6580 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6581 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6585 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6592 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6593 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6594 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6595 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6599 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6608 for(;s->mb_y < s->mb_height; s->mb_y++){
6609 for(;s->mb_x < s->mb_width; s->mb_x++){
6610 int ret= decode_mb(h);
6615 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6616 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6621 if(++s->mb_x >= s->mb_width){
6623 if(++s->mb_y >= s->mb_height){
6624 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6625 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6629 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6636 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6637 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6638 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6642 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6649 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6652 return -1; //not reached
6655 static int decode_picture_timing(H264Context *h){
6656 MpegEncContext * const s = &h->s;
6657 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6658 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6659 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6661 if(h->sps.pic_struct_present_flag){
6662 unsigned int i, num_clock_ts;
6663 h->sei_pic_struct = get_bits(&s->gb, 4);
6665 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6668 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6670 for (i = 0 ; i < num_clock_ts ; i++){
6671 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6672 unsigned int full_timestamp_flag;
6673 skip_bits(&s->gb, 2); /* ct_type */
6674 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6675 skip_bits(&s->gb, 5); /* counting_type */
6676 full_timestamp_flag = get_bits(&s->gb, 1);
6677 skip_bits(&s->gb, 1); /* discontinuity_flag */
6678 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6679 skip_bits(&s->gb, 8); /* n_frames */
6680 if(full_timestamp_flag){
6681 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6682 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6683 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6685 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6686 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6687 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6688 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6689 if(get_bits(&s->gb, 1)) /* hours_flag */
6690 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6694 if(h->sps.time_offset_length > 0)
6695 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6702 static int decode_unregistered_user_data(H264Context *h, int size){
6703 MpegEncContext * const s = &h->s;
6704 uint8_t user_data[16+256];
6710 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6711 user_data[i]= get_bits(&s->gb, 8);
6715 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6716 if(e==1 && build>=0)
6717 h->x264_build= build;
6719 if(s->avctx->debug & FF_DEBUG_BUGS)
6720 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6723 skip_bits(&s->gb, 8);
6728 static int decode_sei(H264Context *h){
6729 MpegEncContext * const s = &h->s;
6731 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6736 type+= show_bits(&s->gb, 8);
6737 }while(get_bits(&s->gb, 8) == 255);
6741 size+= show_bits(&s->gb, 8);
6742 }while(get_bits(&s->gb, 8) == 255);
6745 case 1: // Picture timing SEI
6746 if(decode_picture_timing(h) < 0)
6750 if(decode_unregistered_user_data(h, size) < 0)
6754 skip_bits(&s->gb, 8*size);
6757 //FIXME check bits here
6758 align_get_bits(&s->gb);
6764 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6765 MpegEncContext * const s = &h->s;
6767 cpb_count = get_ue_golomb(&s->gb) + 1;
6769 if(cpb_count > 32U){
6770 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6774 get_bits(&s->gb, 4); /* bit_rate_scale */
6775 get_bits(&s->gb, 4); /* cpb_size_scale */
6776 for(i=0; i<cpb_count; i++){
6777 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6778 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6779 get_bits1(&s->gb); /* cbr_flag */
6781 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6782 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6783 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6784 sps->time_offset_length = get_bits(&s->gb, 5);
6788 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6789 MpegEncContext * const s = &h->s;
6790 int aspect_ratio_info_present_flag;
6791 unsigned int aspect_ratio_idc;
6793 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6795 if( aspect_ratio_info_present_flag ) {
6796 aspect_ratio_idc= get_bits(&s->gb, 8);
6797 if( aspect_ratio_idc == EXTENDED_SAR ) {
6798 sps->sar.num= get_bits(&s->gb, 16);
6799 sps->sar.den= get_bits(&s->gb, 16);
6800 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6801 sps->sar= pixel_aspect[aspect_ratio_idc];
6803 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6810 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6812 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6813 get_bits1(&s->gb); /* overscan_appropriate_flag */
6816 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6817 get_bits(&s->gb, 3); /* video_format */
6818 get_bits1(&s->gb); /* video_full_range_flag */
6819 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6820 get_bits(&s->gb, 8); /* colour_primaries */
6821 get_bits(&s->gb, 8); /* transfer_characteristics */
6822 get_bits(&s->gb, 8); /* matrix_coefficients */
6826 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6827 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6828 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6831 sps->timing_info_present_flag = get_bits1(&s->gb);
6832 if(sps->timing_info_present_flag){
6833 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6834 sps->time_scale = get_bits_long(&s->gb, 32);
6835 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6838 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6839 if(sps->nal_hrd_parameters_present_flag)
6840 if(decode_hrd_parameters(h, sps) < 0)
6842 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6843 if(sps->vcl_hrd_parameters_present_flag)
6844 if(decode_hrd_parameters(h, sps) < 0)
6846 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6847 get_bits1(&s->gb); /* low_delay_hrd_flag */
6848 sps->pic_struct_present_flag = get_bits1(&s->gb);
6850 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6851 if(sps->bitstream_restriction_flag){
6852 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6853 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6854 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6855 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6856 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6857 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6858 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6860 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6861 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6869 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6870 const uint8_t *jvt_list, const uint8_t *fallback_list){
6871 MpegEncContext * const s = &h->s;
6872 int i, last = 8, next = 8;
6873 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6874 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6875 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6877 for(i=0;i<size;i++){
6879 next = (last + get_se_golomb(&s->gb)) & 0xff;
6880 if(!i && !next){ /* matrix not written, we use the preset one */
6881 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6884 last = factors[scan[i]] = next ? next : last;
6888 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6889 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6890 MpegEncContext * const s = &h->s;
6891 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6892 const uint8_t *fallback[4] = {
6893 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6894 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6895 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6896 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6898 if(get_bits1(&s->gb)){
6899 sps->scaling_matrix_present |= is_sps;
6900 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6901 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6902 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6903 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6904 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6905 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6906 if(is_sps || pps->transform_8x8_mode){
6907 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6908 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6913 static inline int decode_seq_parameter_set(H264Context *h){
6914 MpegEncContext * const s = &h->s;
6915 int profile_idc, level_idc;
6916 unsigned int sps_id;
6920 profile_idc= get_bits(&s->gb, 8);
6921 get_bits1(&s->gb); //constraint_set0_flag
6922 get_bits1(&s->gb); //constraint_set1_flag
6923 get_bits1(&s->gb); //constraint_set2_flag
6924 get_bits1(&s->gb); //constraint_set3_flag
6925 get_bits(&s->gb, 4); // reserved
6926 level_idc= get_bits(&s->gb, 8);
6927 sps_id= get_ue_golomb(&s->gb);
6929 if(sps_id >= MAX_SPS_COUNT) {
6930 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
6933 sps= av_mallocz(sizeof(SPS));
6937 sps->profile_idc= profile_idc;
6938 sps->level_idc= level_idc;
6940 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
6941 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
6942 sps->scaling_matrix_present = 0;
6944 if(sps->profile_idc >= 100){ //high profile
6945 sps->chroma_format_idc= get_ue_golomb(&s->gb);
6946 if(sps->chroma_format_idc == 3)
6947 get_bits1(&s->gb); //residual_color_transform_flag
6948 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6949 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6950 sps->transform_bypass = get_bits1(&s->gb);
6951 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6953 sps->chroma_format_idc= 1;
6956 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6957 sps->poc_type= get_ue_golomb(&s->gb);
6959 if(sps->poc_type == 0){ //FIXME #define
6960 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6961 } else if(sps->poc_type == 1){//FIXME #define
6962 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6963 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6964 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6965 sps->poc_cycle_length = get_ue_golomb(&s->gb);
6967 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
6968 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
6972 for(i=0; i<sps->poc_cycle_length; i++)
6973 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6974 }else if(sps->poc_type != 2){
6975 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6979 sps->ref_frame_count= get_ue_golomb(&s->gb);
6980 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
6981 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
6984 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
6985 sps->mb_width = get_ue_golomb(&s->gb) + 1;
6986 sps->mb_height= get_ue_golomb(&s->gb) + 1;
6987 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
6988 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
6989 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
6993 sps->frame_mbs_only_flag= get_bits1(&s->gb);
6994 if(!sps->frame_mbs_only_flag)
6995 sps->mb_aff= get_bits1(&s->gb);
6999 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7001 #ifndef ALLOW_INTERLACE
7003 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7005 sps->crop= get_bits1(&s->gb);
7007 sps->crop_left = get_ue_golomb(&s->gb);
7008 sps->crop_right = get_ue_golomb(&s->gb);
7009 sps->crop_top = get_ue_golomb(&s->gb);
7010 sps->crop_bottom= get_ue_golomb(&s->gb);
7011 if(sps->crop_left || sps->crop_top){
7012 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7014 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7015 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7021 sps->crop_bottom= 0;
7024 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7025 if( sps->vui_parameters_present_flag )
7026 decode_vui_parameters(h, sps);
7028 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7029 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7030 sps_id, sps->profile_idc, sps->level_idc,
7032 sps->ref_frame_count,
7033 sps->mb_width, sps->mb_height,
7034 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7035 sps->direct_8x8_inference_flag ? "8B8" : "",
7036 sps->crop_left, sps->crop_right,
7037 sps->crop_top, sps->crop_bottom,
7038 sps->vui_parameters_present_flag ? "VUI" : "",
7039 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7042 av_free(h->sps_buffers[sps_id]);
7043 h->sps_buffers[sps_id]= sps;
7051 build_qp_table(PPS *pps, int t, int index)
7054 for(i = 0; i < 52; i++)
7055 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7058 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7059 MpegEncContext * const s = &h->s;
7060 unsigned int pps_id= get_ue_golomb(&s->gb);
7063 if(pps_id >= MAX_PPS_COUNT) {
7064 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7068 pps= av_mallocz(sizeof(PPS));
7071 pps->sps_id= get_ue_golomb(&s->gb);
7072 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7073 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7077 pps->cabac= get_bits1(&s->gb);
7078 pps->pic_order_present= get_bits1(&s->gb);
7079 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7080 if(pps->slice_group_count > 1 ){
7081 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7082 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7083 switch(pps->mb_slice_group_map_type){
7086 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7087 | run_length[ i ] |1 |ue(v) |
7092 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7094 | top_left_mb[ i ] |1 |ue(v) |
7095 | bottom_right_mb[ i ] |1 |ue(v) |
7103 | slice_group_change_direction_flag |1 |u(1) |
7104 | slice_group_change_rate_minus1 |1 |ue(v) |
7109 | slice_group_id_cnt_minus1 |1 |ue(v) |
7110 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7112 | slice_group_id[ i ] |1 |u(v) |
7117 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7118 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7119 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7120 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7124 pps->weighted_pred= get_bits1(&s->gb);
7125 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7126 pps->init_qp= get_se_golomb(&s->gb) + 26;
7127 pps->init_qs= get_se_golomb(&s->gb) + 26;
7128 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7129 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7130 pps->constrained_intra_pred= get_bits1(&s->gb);
7131 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7133 pps->transform_8x8_mode= 0;
7134 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7135 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7136 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7138 if(get_bits_count(&s->gb) < bit_length){
7139 pps->transform_8x8_mode= get_bits1(&s->gb);
7140 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7141 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7143 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7146 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7147 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7148 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7149 h->pps.chroma_qp_diff= 1;
7151 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7152 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7153 pps_id, pps->sps_id,
7154 pps->cabac ? "CABAC" : "CAVLC",
7155 pps->slice_group_count,
7156 pps->ref_count[0], pps->ref_count[1],
7157 pps->weighted_pred ? "weighted" : "",
7158 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7159 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7160 pps->constrained_intra_pred ? "CONSTR" : "",
7161 pps->redundant_pic_cnt_present ? "REDU" : "",
7162 pps->transform_8x8_mode ? "8x8DCT" : ""
7166 av_free(h->pps_buffers[pps_id]);
7167 h->pps_buffers[pps_id]= pps;
7175 * Call decode_slice() for each context.
7177 * @param h h264 master context
7178 * @param context_count number of contexts to execute
7180 static void execute_decode_slices(H264Context *h, int context_count){
7181 MpegEncContext * const s = &h->s;
7182 AVCodecContext * const avctx= s->avctx;
7186 if(context_count == 1) {
7187 decode_slice(avctx, &h);
7189 for(i = 1; i < context_count; i++) {
7190 hx = h->thread_context[i];
7191 hx->s.error_recognition = avctx->error_recognition;
7192 hx->s.error_count = 0;
7195 avctx->execute(avctx, (void *)decode_slice,
7196 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7198 /* pull back stuff from slices to master context */
7199 hx = h->thread_context[context_count - 1];
7200 s->mb_x = hx->s.mb_x;
7201 s->mb_y = hx->s.mb_y;
7202 s->dropable = hx->s.dropable;
7203 s->picture_structure = hx->s.picture_structure;
7204 for(i = 1; i < context_count; i++)
7205 h->s.error_count += h->thread_context[i]->s.error_count;
7210 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7211 MpegEncContext * const s = &h->s;
7212 AVCodecContext * const avctx= s->avctx;
7214 H264Context *hx; ///< thread context
7215 int context_count = 0;
7217 h->max_contexts = avctx->thread_count;
7220 for(i=0; i<50; i++){
7221 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7224 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7225 h->current_slice = 0;
7226 if (!s->first_field)
7227 s->current_picture_ptr= NULL;
7239 if(buf_index >= buf_size) break;
7241 for(i = 0; i < h->nal_length_size; i++)
7242 nalsize = (nalsize << 8) | buf[buf_index++];
7243 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7248 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7253 // start code prefix search
7254 for(; buf_index + 3 < buf_size; buf_index++){
7255 // This should always succeed in the first iteration.
7256 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7260 if(buf_index+3 >= buf_size) break;
7265 hx = h->thread_context[context_count];
7267 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7268 if (ptr==NULL || dst_length < 0){
7271 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7273 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7275 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7276 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7279 if (h->is_avc && (nalsize != consumed)){
7280 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7284 buf_index += consumed;
7286 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7287 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7292 switch(hx->nal_unit_type){
7294 if (h->nal_unit_type != NAL_IDR_SLICE) {
7295 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7298 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7300 init_get_bits(&hx->s.gb, ptr, bit_length);
7302 hx->inter_gb_ptr= &hx->s.gb;
7303 hx->s.data_partitioning = 0;
7305 if((err = decode_slice_header(hx, h)))
7308 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7309 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7310 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7311 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7312 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7313 && avctx->skip_frame < AVDISCARD_ALL)
7317 init_get_bits(&hx->s.gb, ptr, bit_length);
7319 hx->inter_gb_ptr= NULL;
7320 hx->s.data_partitioning = 1;
7322 err = decode_slice_header(hx, h);
7325 init_get_bits(&hx->intra_gb, ptr, bit_length);
7326 hx->intra_gb_ptr= &hx->intra_gb;
7329 init_get_bits(&hx->inter_gb, ptr, bit_length);
7330 hx->inter_gb_ptr= &hx->inter_gb;
7332 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7333 && s->context_initialized
7335 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7336 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7337 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7338 && avctx->skip_frame < AVDISCARD_ALL)
7342 init_get_bits(&s->gb, ptr, bit_length);
7346 init_get_bits(&s->gb, ptr, bit_length);
7347 decode_seq_parameter_set(h);
7349 if(s->flags& CODEC_FLAG_LOW_DELAY)
7352 if(avctx->has_b_frames < 2)
7353 avctx->has_b_frames= !s->low_delay;
7356 init_get_bits(&s->gb, ptr, bit_length);
7358 decode_picture_parameter_set(h, bit_length);
7362 case NAL_END_SEQUENCE:
7363 case NAL_END_STREAM:
7364 case NAL_FILLER_DATA:
7366 case NAL_AUXILIARY_SLICE:
7369 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7372 if(context_count == h->max_contexts) {
7373 execute_decode_slices(h, context_count);
7378 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7380 /* Slice could not be decoded in parallel mode, copy down
7381 * NAL unit stuff to context 0 and restart. Note that
7382 * rbsp_buffer is not transferred, but since we no longer
7383 * run in parallel mode this should not be an issue. */
7384 h->nal_unit_type = hx->nal_unit_type;
7385 h->nal_ref_idc = hx->nal_ref_idc;
7391 execute_decode_slices(h, context_count);
7396 * returns the number of bytes consumed for building the current frame
7398 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7399 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7400 if(pos+10>buf_size) pos=buf_size; // oops ;)
7405 static int decode_frame(AVCodecContext *avctx,
7406 void *data, int *data_size,
7407 const uint8_t *buf, int buf_size)
7409 H264Context *h = avctx->priv_data;
7410 MpegEncContext *s = &h->s;
7411 AVFrame *pict = data;
7414 s->flags= avctx->flags;
7415 s->flags2= avctx->flags2;
7417 /* end of stream, output what is still in the buffers */
7418 if (buf_size == 0) {
7422 //FIXME factorize this with the output code below
7423 out = h->delayed_pic[0];
7425 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7426 if(h->delayed_pic[i]->poc < out->poc){
7427 out = h->delayed_pic[i];
7431 for(i=out_idx; h->delayed_pic[i]; i++)
7432 h->delayed_pic[i] = h->delayed_pic[i+1];
7435 *data_size = sizeof(AVFrame);
7436 *pict= *(AVFrame*)out;
7442 if(h->is_avc && !h->got_avcC) {
7443 int i, cnt, nalsize;
7444 unsigned char *p = avctx->extradata;
7445 if(avctx->extradata_size < 7) {
7446 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7450 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7453 /* sps and pps in the avcC always have length coded with 2 bytes,
7454 so put a fake nal_length_size = 2 while parsing them */
7455 h->nal_length_size = 2;
7456 // Decode sps from avcC
7457 cnt = *(p+5) & 0x1f; // Number of sps
7459 for (i = 0; i < cnt; i++) {
7460 nalsize = AV_RB16(p) + 2;
7461 if(decode_nal_units(h, p, nalsize) < 0) {
7462 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7467 // Decode pps from avcC
7468 cnt = *(p++); // Number of pps
7469 for (i = 0; i < cnt; i++) {
7470 nalsize = AV_RB16(p) + 2;
7471 if(decode_nal_units(h, p, nalsize) != nalsize) {
7472 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7477 // Now store right nal length size, that will be use to parse all other nals
7478 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7479 // Do not reparse avcC
7483 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7484 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7489 buf_index=decode_nal_units(h, buf, buf_size);
7493 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7494 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7495 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7499 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7500 Picture *out = s->current_picture_ptr;
7501 Picture *cur = s->current_picture_ptr;
7502 int i, pics, cross_idr, out_of_order, out_idx;
7506 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7507 s->current_picture_ptr->pict_type= s->pict_type;
7510 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7511 h->prev_poc_msb= h->poc_msb;
7512 h->prev_poc_lsb= h->poc_lsb;
7514 h->prev_frame_num_offset= h->frame_num_offset;
7515 h->prev_frame_num= h->frame_num;
7518 * FIXME: Error handling code does not seem to support interlaced
7519 * when slices span multiple rows
7520 * The ff_er_add_slice calls don't work right for bottom
7521 * fields; they cause massive erroneous error concealing
7522 * Error marking covers both fields (top and bottom).
7523 * This causes a mismatched s->error_count
7524 * and a bad error table. Further, the error count goes to
7525 * INT_MAX when called for bottom field, because mb_y is
7526 * past end by one (callers fault) and resync_mb_y != 0
7527 * causes problems for the first MB line, too.
7534 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7535 /* Wait for second field. */
7539 cur->repeat_pict = 0;
7541 /* Signal interlacing information externally. */
7542 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7543 if(h->sps.pic_struct_present_flag){
7544 switch (h->sei_pic_struct)
7546 case SEI_PIC_STRUCT_FRAME:
7547 cur->interlaced_frame = 0;
7549 case SEI_PIC_STRUCT_TOP_FIELD:
7550 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7551 case SEI_PIC_STRUCT_TOP_BOTTOM:
7552 case SEI_PIC_STRUCT_BOTTOM_TOP:
7553 cur->interlaced_frame = 1;
7555 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7556 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7557 // Signal the possibility of telecined film externally (pic_struct 5,6)
7558 // From these hints, let the applications decide if they apply deinterlacing.
7559 cur->repeat_pict = 1;
7560 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7562 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7563 // Force progressive here, as doubling interlaced frame is a bad idea.
7564 cur->interlaced_frame = 0;
7565 cur->repeat_pict = 2;
7567 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7568 cur->interlaced_frame = 0;
7569 cur->repeat_pict = 4;
7573 /* Derive interlacing flag from used decoding process. */
7574 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7577 if (cur->field_poc[0] != cur->field_poc[1]){
7578 /* Derive top_field_first from field pocs. */
7579 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7581 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7582 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7583 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7584 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7585 cur->top_field_first = 1;
7587 cur->top_field_first = 0;
7589 /* Most likely progressive */
7590 cur->top_field_first = 0;
7594 //FIXME do something with unavailable reference frames
7596 /* Sort B-frames into display order */
7598 if(h->sps.bitstream_restriction_flag
7599 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7600 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7604 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7605 && !h->sps.bitstream_restriction_flag){
7606 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7611 while(h->delayed_pic[pics]) pics++;
7613 assert(pics <= MAX_DELAYED_PIC_COUNT);
7615 h->delayed_pic[pics++] = cur;
7616 if(cur->reference == 0)
7617 cur->reference = DELAYED_PIC_REF;
7619 out = h->delayed_pic[0];
7621 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7622 if(h->delayed_pic[i]->poc < out->poc){
7623 out = h->delayed_pic[i];
7626 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7628 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7630 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7632 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7634 ((!cross_idr && out->poc > h->outputed_poc + 2)
7635 || cur->pict_type == FF_B_TYPE)))
7638 s->avctx->has_b_frames++;
7641 if(out_of_order || pics > s->avctx->has_b_frames){
7642 out->reference &= ~DELAYED_PIC_REF;
7643 for(i=out_idx; h->delayed_pic[i]; i++)
7644 h->delayed_pic[i] = h->delayed_pic[i+1];
7646 if(!out_of_order && pics > s->avctx->has_b_frames){
7647 *data_size = sizeof(AVFrame);
7649 h->outputed_poc = out->poc;
7650 *pict= *(AVFrame*)out;
7652 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7657 assert(pict->data[0] || !*data_size);
7658 ff_print_debug_info(s, pict);
7659 //printf("out %d\n", (int)pict->data[0]);
7662 /* Return the Picture timestamp as the frame number */
7663 /* we subtract 1 because it is added on utils.c */
7664 avctx->frame_number = s->picture_number - 1;
7666 return get_consumed_bytes(s, buf_index, buf_size);
7669 static inline void fill_mb_avail(H264Context *h){
7670 MpegEncContext * const s = &h->s;
7671 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7674 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7675 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7676 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7682 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7683 h->mb_avail[4]= 1; //FIXME move out
7684 h->mb_avail[5]= 0; //FIXME move out
7692 #define SIZE (COUNT*40)
7698 // int int_temp[10000];
7700 AVCodecContext avctx;
7702 dsputil_init(&dsp, &avctx);
7704 init_put_bits(&pb, temp, SIZE);
7705 printf("testing unsigned exp golomb\n");
7706 for(i=0; i<COUNT; i++){
7708 set_ue_golomb(&pb, i);
7709 STOP_TIMER("set_ue_golomb");
7711 flush_put_bits(&pb);
7713 init_get_bits(&gb, temp, 8*SIZE);
7714 for(i=0; i<COUNT; i++){
7717 s= show_bits(&gb, 24);
7720 j= get_ue_golomb(&gb);
7722 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7725 STOP_TIMER("get_ue_golomb");
7729 init_put_bits(&pb, temp, SIZE);
7730 printf("testing signed exp golomb\n");
7731 for(i=0; i<COUNT; i++){
7733 set_se_golomb(&pb, i - COUNT/2);
7734 STOP_TIMER("set_se_golomb");
7736 flush_put_bits(&pb);
7738 init_get_bits(&gb, temp, 8*SIZE);
7739 for(i=0; i<COUNT; i++){
7742 s= show_bits(&gb, 24);
7745 j= get_se_golomb(&gb);
7746 if(j != i - COUNT/2){
7747 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7750 STOP_TIMER("get_se_golomb");
7754 printf("testing 4x4 (I)DCT\n");
7757 uint8_t src[16], ref[16];
7758 uint64_t error= 0, max_error=0;
7760 for(i=0; i<COUNT; i++){
7762 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7763 for(j=0; j<16; j++){
7764 ref[j]= random()%255;
7765 src[j]= random()%255;
7768 h264_diff_dct_c(block, src, ref, 4);
7771 for(j=0; j<16; j++){
7772 // printf("%d ", block[j]);
7773 block[j]= block[j]*4;
7774 if(j&1) block[j]= (block[j]*4 + 2)/5;
7775 if(j&4) block[j]= (block[j]*4 + 2)/5;
7779 s->dsp.h264_idct_add(ref, block, 4);
7780 /* for(j=0; j<16; j++){
7781 printf("%d ", ref[j]);
7785 for(j=0; j<16; j++){
7786 int diff= FFABS(src[j] - ref[j]);
7789 max_error= FFMAX(max_error, diff);
7792 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7793 printf("testing quantizer\n");
7794 for(qp=0; qp<52; qp++){
7796 src1_block[i]= src2_block[i]= random()%255;
7799 printf("Testing NAL layer\n");
7801 uint8_t bitstream[COUNT];
7802 uint8_t nal[COUNT*2];
7804 memset(&h, 0, sizeof(H264Context));
7806 for(i=0; i<COUNT; i++){
7814 for(j=0; j<COUNT; j++){
7815 bitstream[j]= (random() % 255) + 1;
7818 for(j=0; j<zeros; j++){
7819 int pos= random() % COUNT;
7820 while(bitstream[pos] == 0){
7829 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7831 printf("encoding failed\n");
7835 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7839 if(out_length != COUNT){
7840 printf("incorrect length %d %d\n", out_length, COUNT);
7844 if(consumed != nal_length){
7845 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7849 if(memcmp(bitstream, out, COUNT)){
7850 printf("mismatch\n");
7856 printf("Testing RBSP\n");
7864 static av_cold int decode_end(AVCodecContext *avctx)
7866 H264Context *h = avctx->priv_data;
7867 MpegEncContext *s = &h->s;
7870 av_freep(&h->rbsp_buffer[0]);
7871 av_freep(&h->rbsp_buffer[1]);
7872 free_tables(h); //FIXME cleanup init stuff perhaps
7874 for(i = 0; i < MAX_SPS_COUNT; i++)
7875 av_freep(h->sps_buffers + i);
7877 for(i = 0; i < MAX_PPS_COUNT; i++)
7878 av_freep(h->pps_buffers + i);
7882 // memset(h, 0, sizeof(H264Context));
7888 AVCodec h264_decoder = {
7892 sizeof(H264Context),
7897 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7899 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),