2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "x86/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
139 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
140 top_xy -= s->mb_stride;
142 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
143 topleft_xy -= s->mb_stride;
144 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
145 topleft_xy += s->mb_stride;
146 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
147 topleft_partition = 0;
149 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
150 topright_xy -= s->mb_stride;
152 if (left_mb_field_flag != curr_mb_field_flag) {
153 left_xy[1] = left_xy[0] = pair_xy - 1;
154 if (curr_mb_field_flag) {
155 left_xy[1] += s->mb_stride;
156 left_block = left_block_options[3];
158 left_block= left_block_options[2 - bottom];
163 h->top_mb_xy = top_xy;
164 h->left_mb_xy[0] = left_xy[0];
165 h->left_mb_xy[1] = left_xy[1];
169 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
170 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
171 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
173 if(MB_MBAFF && !IS_INTRA(mb_type)){
175 for(list=0; list<h->list_count; list++){
176 //These values where changed for ease of performing MC, we need to change them back
177 //FIXME maybe we can make MC and loop filter use the same values or prevent
178 //the MC code from changing ref_cache and rather use a temporary array.
179 if(USES_LIST(mb_type,list)){
180 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
181 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
182 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
184 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
185 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
191 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
192 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
193 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
194 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
196 if(IS_INTRA(mb_type)){
197 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
198 h->topleft_samples_available=
199 h->top_samples_available=
200 h->left_samples_available= 0xFFFF;
201 h->topright_samples_available= 0xEEEA;
203 if(!(top_type & type_mask)){
204 h->topleft_samples_available= 0xB3FF;
205 h->top_samples_available= 0x33FF;
206 h->topright_samples_available= 0x26EA;
208 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
209 if(IS_INTERLACED(mb_type)){
210 if(!(left_type[0] & type_mask)){
211 h->topleft_samples_available&= 0xDFFF;
212 h->left_samples_available&= 0x5FFF;
214 if(!(left_type[1] & type_mask)){
215 h->topleft_samples_available&= 0xFF5F;
216 h->left_samples_available&= 0xFF5F;
219 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
220 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
221 assert(left_xy[0] == left_xy[1]);
222 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
223 h->topleft_samples_available&= 0xDF5F;
224 h->left_samples_available&= 0x5F5F;
228 if(!(left_type[0] & type_mask)){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(topleft_type & type_mask))
235 h->topleft_samples_available&= 0x7FFF;
237 if(!(topright_type & type_mask))
238 h->topright_samples_available&= 0xFBFF;
240 if(IS_INTRA4x4(mb_type)){
241 if(IS_INTRA4x4(top_type)){
242 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
243 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
244 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
245 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
248 if(!(top_type & type_mask))
253 h->intra4x4_pred_mode_cache[4+8*0]=
254 h->intra4x4_pred_mode_cache[5+8*0]=
255 h->intra4x4_pred_mode_cache[6+8*0]=
256 h->intra4x4_pred_mode_cache[7+8*0]= pred;
259 if(IS_INTRA4x4(left_type[i])){
260 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
261 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
264 if(!(left_type[i] & type_mask))
269 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
270 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
286 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
288 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
289 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
290 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
291 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
293 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
294 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
296 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
297 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
300 h->non_zero_count_cache[4+8*0]=
301 h->non_zero_count_cache[5+8*0]=
302 h->non_zero_count_cache[6+8*0]=
303 h->non_zero_count_cache[7+8*0]=
305 h->non_zero_count_cache[1+8*0]=
306 h->non_zero_count_cache[2+8*0]=
308 h->non_zero_count_cache[1+8*3]=
309 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
313 for (i=0; i<2; i++) {
315 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
316 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
317 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
318 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
320 h->non_zero_count_cache[3+8*1 + 2*8*i]=
321 h->non_zero_count_cache[3+8*2 + 2*8*i]=
322 h->non_zero_count_cache[0+8*1 + 8*i]=
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
330 h->top_cbp = h->cbp_table[top_xy];
331 } else if(IS_INTRA(mb_type)) {
338 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
339 } else if(IS_INTRA(mb_type)) {
345 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
348 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
353 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
355 for(list=0; list<h->list_count; list++){
356 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
357 /*if(!h->mv_cache_clean[list]){
358 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
359 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
360 h->mv_cache_clean[list]= 1;
364 h->mv_cache_clean[list]= 0;
366 if(USES_LIST(top_type, list)){
367 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
368 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
369 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
370 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
371 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
373 h->ref_cache[list][scan8[0] + 0 - 1*8]=
374 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
375 h->ref_cache[list][scan8[0] + 2 - 1*8]=
376 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
378 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
379 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
380 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
382 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
386 int cache_idx = scan8[0] - 1 + i*2*8;
387 if(USES_LIST(left_type[i], list)){
388 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
389 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
390 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
391 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
392 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
393 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
395 *(uint32_t*)h->mv_cache [list][cache_idx ]=
396 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
397 h->ref_cache[list][cache_idx ]=
398 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
402 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
405 if(USES_LIST(topleft_type, list)){
406 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
407 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
408 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
409 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
415 if(USES_LIST(topright_type, list)){
416 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
417 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
418 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
419 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
421 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
425 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
428 h->ref_cache[list][scan8[5 ]+1] =
429 h->ref_cache[list][scan8[7 ]+1] =
430 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
431 h->ref_cache[list][scan8[4 ]] =
432 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
433 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
434 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
435 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
436 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
437 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
440 /* XXX beurk, Load mvd */
441 if(USES_LIST(top_type, list)){
442 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
443 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
444 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
445 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
448 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
449 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
450 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
453 if(USES_LIST(left_type[0], list)){
454 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
458 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
461 if(USES_LIST(left_type[1], list)){
462 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
463 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
466 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
469 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
470 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
471 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
472 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
473 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
475 if(h->slice_type_nos == FF_B_TYPE){
476 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
478 if(IS_DIRECT(top_type)){
479 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
480 }else if(IS_8X8(top_type)){
481 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
482 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
483 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
488 if(IS_DIRECT(left_type[0]))
489 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
490 else if(IS_8X8(left_type[0]))
491 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
493 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
495 if(IS_DIRECT(left_type[1]))
496 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
497 else if(IS_8X8(left_type[1]))
498 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
500 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
506 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
507 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
508 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
509 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
510 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
512 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
513 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
514 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
515 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
517 #define MAP_F2F(idx, mb_type)\
518 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
519 h->ref_cache[list][idx] <<= 1;\
520 h->mv_cache[list][idx][1] /= 2;\
521 h->mvd_cache[list][idx][1] /= 2;\
526 #define MAP_F2F(idx, mb_type)\
527 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] >>= 1;\
529 h->mv_cache[list][idx][1] <<= 1;\
530 h->mvd_cache[list][idx][1] <<= 1;\
540 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
543 static inline void write_back_intra_pred_mode(H264Context *h){
544 const int mb_xy= h->mb_xy;
546 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
547 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
548 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
549 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
550 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
551 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
552 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
556 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
558 static inline int check_intra4x4_pred_mode(H264Context *h){
559 MpegEncContext * const s = &h->s;
560 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
561 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
564 if(!(h->top_samples_available&0x8000)){
566 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
568 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
571 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
576 if((h->left_samples_available&0x8888)!=0x8888){
577 static const int mask[4]={0x8000,0x2000,0x80,0x20};
579 if(!(h->left_samples_available&mask[i])){
580 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
582 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
585 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
592 } //FIXME cleanup like next
595 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
597 static inline int check_intra_pred_mode(H264Context *h, int mode){
598 MpegEncContext * const s = &h->s;
599 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
600 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
603 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
607 if(!(h->top_samples_available&0x8000)){
610 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
615 if((h->left_samples_available&0x8080) != 0x8080){
617 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
618 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
621 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
630 * gets the predicted intra4x4 prediction mode.
632 static inline int pred_intra_mode(H264Context *h, int n){
633 const int index8= scan8[n];
634 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
635 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
636 const int min= FFMIN(left, top);
638 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
640 if(min<0) return DC_PRED;
644 static inline void write_back_non_zero_count(H264Context *h){
645 const int mb_xy= h->mb_xy;
647 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
648 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
649 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
650 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
651 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
652 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
653 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
655 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
656 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
657 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
659 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
660 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
661 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
665 * gets the predicted number of non-zero coefficients.
666 * @param n block index
668 static inline int pred_non_zero_count(H264Context *h, int n){
669 const int index8= scan8[n];
670 const int left= h->non_zero_count_cache[index8 - 1];
671 const int top = h->non_zero_count_cache[index8 - 8];
674 if(i<64) i= (i+1)>>1;
676 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
681 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
682 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
683 MpegEncContext *s = &h->s;
685 /* there is no consistent mapping of mvs to neighboring locations that will
686 * make mbaff happy, so we can't move all this logic to fill_caches */
688 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
690 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
691 *C = h->mv_cache[list][scan8[0]-2];
694 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
695 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
696 if(IS_INTERLACED(mb_types[topright_xy])){
697 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
698 const int x4 = X4, y4 = Y4;\
699 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
700 if(!USES_LIST(mb_type,list))\
701 return LIST_NOT_USED;\
702 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
703 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
704 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
705 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
707 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
710 if(topright_ref == PART_NOT_AVAILABLE
711 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
712 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
714 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
715 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
718 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
720 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
721 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
727 if(topright_ref != PART_NOT_AVAILABLE){
728 *C= h->mv_cache[list][ i - 8 + part_width ];
731 tprintf(s->avctx, "topright MV not available\n");
733 *C= h->mv_cache[list][ i - 8 - 1 ];
734 return h->ref_cache[list][ i - 8 - 1 ];
739 * gets the predicted MV.
740 * @param n the block index
741 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
742 * @param mx the x component of the predicted motion vector
743 * @param my the y component of the predicted motion vector
745 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
746 const int index8= scan8[n];
747 const int top_ref= h->ref_cache[list][ index8 - 8 ];
748 const int left_ref= h->ref_cache[list][ index8 - 1 ];
749 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
750 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
752 int diagonal_ref, match_count;
754 assert(part_width==1 || part_width==2 || part_width==4);
764 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
765 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
766 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
767 if(match_count > 1){ //most common
768 *mx= mid_pred(A[0], B[0], C[0]);
769 *my= mid_pred(A[1], B[1], C[1]);
770 }else if(match_count==1){
774 }else if(top_ref==ref){
782 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
786 *mx= mid_pred(A[0], B[0], C[0]);
787 *my= mid_pred(A[1], B[1], C[1]);
791 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
795 * gets the directionally predicted 16x8 MV.
796 * @param n the block index
797 * @param mx the x component of the predicted motion vector
798 * @param my the y component of the predicted motion vector
800 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
802 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
803 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
805 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
813 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
814 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
826 pred_motion(h, n, 4, list, ref, mx, my);
830 * gets the directionally predicted 8x16 MV.
831 * @param n the block index
832 * @param mx the x component of the predicted motion vector
833 * @param my the y component of the predicted motion vector
835 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
837 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
838 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
840 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
851 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
853 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
855 if(diagonal_ref == ref){
863 pred_motion(h, n, 2, list, ref, mx, my);
866 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
867 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
868 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
870 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
872 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
873 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
874 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
880 pred_motion(h, 0, 4, 0, 0, mx, my);
885 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
886 int poc0 = h->ref_list[0][i].poc;
887 int td = av_clip(poc1 - poc0, -128, 127);
888 if(td == 0 || h->ref_list[0][i].long_ref){
891 int tb = av_clip(poc - poc0, -128, 127);
892 int tx = (16384 + (FFABS(td) >> 1)) / td;
893 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
897 static inline void direct_dist_scale_factor(H264Context * const h){
898 MpegEncContext * const s = &h->s;
899 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
900 const int poc1 = h->ref_list[1][0].poc;
902 for(field=0; field<2; field++){
903 const int poc = h->s.current_picture_ptr->field_poc[field];
904 const int poc1 = h->ref_list[1][0].field_poc[field];
905 for(i=0; i < 2*h->ref_count[0]; i++)
906 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
909 for(i=0; i<h->ref_count[0]; i++){
910 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
914 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
915 MpegEncContext * const s = &h->s;
916 Picture * const ref1 = &h->ref_list[1][0];
917 int j, old_ref, rfield;
918 int start= mbafi ? 16 : 0;
919 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
920 int interl= mbafi || s->picture_structure != PICT_FRAME;
922 /* bogus; fills in for missing frames */
923 memset(map[list], 0, sizeof(map[list]));
925 for(rfield=0; rfield<2; rfield++){
926 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
927 int poc = ref1->ref_poc[colfield][list][old_ref];
931 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
932 poc= (poc&~3) + rfield + 1;
934 for(j=start; j<end; j++){
935 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
936 int cur_ref= mbafi ? (j-16)^field : j;
937 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
939 map[list][old_ref] = cur_ref;
947 static inline void direct_ref_list_init(H264Context * const h){
948 MpegEncContext * const s = &h->s;
949 Picture * const ref1 = &h->ref_list[1][0];
950 Picture * const cur = s->current_picture_ptr;
952 int sidx= (s->picture_structure&1)^1;
953 int ref1sidx= (ref1->reference&1)^1;
955 for(list=0; list<2; list++){
956 cur->ref_count[sidx][list] = h->ref_count[list];
957 for(j=0; j<h->ref_count[list]; j++)
958 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
961 if(s->picture_structure == PICT_FRAME){
962 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
963 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
966 cur->mbaff= FRAME_MBAFF;
968 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
971 for(list=0; list<2; list++){
972 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
973 for(field=0; field<2; field++)
974 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
978 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
979 MpegEncContext * const s = &h->s;
980 int b8_stride = h->b8_stride;
981 int b4_stride = h->b_stride;
982 int mb_xy = h->mb_xy;
984 const int16_t (*l1mv0)[2], (*l1mv1)[2];
985 const int8_t *l1ref0, *l1ref1;
986 const int is_b8x8 = IS_8X8(*mb_type);
987 unsigned int sub_mb_type;
990 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
992 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
993 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
994 int cur_poc = s->current_picture_ptr->poc;
995 int *col_poc = h->ref_list[1]->field_poc;
996 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
997 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
999 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1000 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1001 mb_xy += s->mb_stride*fieldoff;
1004 }else{ // AFL/AFR/FR/FL -> AFR/FR
1005 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1006 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1007 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1008 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1011 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1012 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1013 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1015 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1016 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1018 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1019 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1021 }else{ // AFR/FR -> AFR/FR
1024 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1025 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1026 /* FIXME save sub mb types from previous frames (or derive from MVs)
1027 * so we know exactly what block size to use */
1028 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1029 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1030 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1031 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1032 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1034 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1035 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1040 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1041 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1042 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1043 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1046 l1ref0 += h->b8_stride;
1047 l1ref1 += h->b8_stride;
1048 l1mv0 += 2*b4_stride;
1049 l1mv1 += 2*b4_stride;
1053 if(h->direct_spatial_mv_pred){
1058 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1060 /* ref = min(neighbors) */
1061 for(list=0; list<2; list++){
1062 int refa = h->ref_cache[list][scan8[0] - 1];
1063 int refb = h->ref_cache[list][scan8[0] - 8];
1064 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1065 if(refc == PART_NOT_AVAILABLE)
1066 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1067 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1072 if(ref[0] < 0 && ref[1] < 0){
1073 ref[0] = ref[1] = 0;
1074 mv[0][0] = mv[0][1] =
1075 mv[1][0] = mv[1][1] = 0;
1077 for(list=0; list<2; list++){
1079 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1081 mv[list][0] = mv[list][1] = 0;
1087 *mb_type &= ~MB_TYPE_L1;
1088 sub_mb_type &= ~MB_TYPE_L1;
1089 }else if(ref[0] < 0){
1091 *mb_type &= ~MB_TYPE_L0;
1092 sub_mb_type &= ~MB_TYPE_L0;
1095 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1096 for(i8=0; i8<4; i8++){
1099 int xy8 = x8+y8*b8_stride;
1100 int xy4 = 3*x8+y8*b4_stride;
1103 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1105 h->sub_mb_type[i8] = sub_mb_type;
1107 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1108 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1109 if(!IS_INTRA(mb_type_col[y8])
1110 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1111 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1113 a= pack16to32(mv[0][0],mv[0][1]);
1115 b= pack16to32(mv[1][0],mv[1][1]);
1117 a= pack16to32(mv[0][0],mv[0][1]);
1118 b= pack16to32(mv[1][0],mv[1][1]);
1120 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1121 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1123 }else if(IS_16X16(*mb_type)){
1126 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1127 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1128 if(!IS_INTRA(mb_type_col[0])
1129 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1130 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1131 && (h->x264_build>33 || !h->x264_build)))){
1133 a= pack16to32(mv[0][0],mv[0][1]);
1135 b= pack16to32(mv[1][0],mv[1][1]);
1137 a= pack16to32(mv[0][0],mv[0][1]);
1138 b= pack16to32(mv[1][0],mv[1][1]);
1140 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1141 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1143 for(i8=0; i8<4; i8++){
1144 const int x8 = i8&1;
1145 const int y8 = i8>>1;
1147 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1149 h->sub_mb_type[i8] = sub_mb_type;
1151 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1152 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1153 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1154 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1157 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1158 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1159 && (h->x264_build>33 || !h->x264_build)))){
1160 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1161 if(IS_SUB_8X8(sub_mb_type)){
1162 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1163 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1165 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1167 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1170 for(i4=0; i4<4; i4++){
1171 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1172 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1174 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1176 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1182 }else{ /* direct temporal mv pred */
1183 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1184 const int *dist_scale_factor = h->dist_scale_factor;
1187 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1188 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1189 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1190 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1192 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1195 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1196 /* FIXME assumes direct_8x8_inference == 1 */
1197 int y_shift = 2*!IS_INTERLACED(*mb_type);
1199 for(i8=0; i8<4; i8++){
1200 const int x8 = i8&1;
1201 const int y8 = i8>>1;
1203 const int16_t (*l1mv)[2]= l1mv0;
1205 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1207 h->sub_mb_type[i8] = sub_mb_type;
1209 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1210 if(IS_INTRA(mb_type_col[y8])){
1211 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1212 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1213 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1217 ref0 = l1ref0[x8 + y8*b8_stride];
1219 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1221 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1224 scale = dist_scale_factor[ref0];
1225 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1228 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1229 int my_col = (mv_col[1]<<y_shift)/2;
1230 int mx = (scale * mv_col[0] + 128) >> 8;
1231 int my = (scale * my_col + 128) >> 8;
1232 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1233 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1239 /* one-to-one mv scaling */
1241 if(IS_16X16(*mb_type)){
1244 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1245 if(IS_INTRA(mb_type_col[0])){
1248 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1249 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1250 const int scale = dist_scale_factor[ref0];
1251 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1253 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1254 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1256 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1257 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1259 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1260 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1261 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1263 for(i8=0; i8<4; i8++){
1264 const int x8 = i8&1;
1265 const int y8 = i8>>1;
1267 const int16_t (*l1mv)[2]= l1mv0;
1269 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1271 h->sub_mb_type[i8] = sub_mb_type;
1272 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1273 if(IS_INTRA(mb_type_col[0])){
1274 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1275 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1276 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1280 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1282 ref0 = map_col_to_list0[0][ref0];
1284 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1287 scale = dist_scale_factor[ref0];
1289 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1290 if(IS_SUB_8X8(sub_mb_type)){
1291 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1292 int mx = (scale * mv_col[0] + 128) >> 8;
1293 int my = (scale * mv_col[1] + 128) >> 8;
1294 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1295 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1297 for(i4=0; i4<4; i4++){
1298 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1299 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1300 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1301 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1302 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1303 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1310 static inline void write_back_motion(H264Context *h, int mb_type){
1311 MpegEncContext * const s = &h->s;
1312 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1313 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1316 if(!USES_LIST(mb_type, 0))
1317 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1319 for(list=0; list<h->list_count; list++){
1321 if(!USES_LIST(mb_type, list))
1325 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1326 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1328 if( h->pps.cabac ) {
1329 if(IS_SKIP(mb_type))
1330 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1333 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1339 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1340 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1341 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1342 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1343 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1347 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1348 if(IS_8X8(mb_type)){
1349 uint8_t *direct_table = &h->direct_table[b8_xy];
1350 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1351 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1352 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1358 * Decodes a network abstraction layer unit.
1359 * @param consumed is the number of bytes used as input
1360 * @param length is the length of the array
1361 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1362 * @returns decoded bytes, might be src+1 if no escapes
1364 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1369 // src[0]&0x80; //forbidden bit
1370 h->nal_ref_idc= src[0]>>5;
1371 h->nal_unit_type= src[0]&0x1F;
1375 for(i=0; i<length; i++)
1376 printf("%2X ", src[i]);
1379 #ifdef HAVE_FAST_UNALIGNED
1380 # ifdef HAVE_FAST_64BIT
1382 for(i=0; i+1<length; i+=9){
1383 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1386 for(i=0; i+1<length; i+=5){
1387 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1390 if(i>0 && !src[i]) i--;
1394 for(i=0; i+1<length; i+=2){
1395 if(src[i]) continue;
1396 if(i>0 && src[i-1]==0) i--;
1398 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1400 /* startcode, so we must be past the end */
1408 if(i>=length-1){ //no escaped 0
1409 *dst_length= length;
1410 *consumed= length+1; //+1 for the header
1414 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1415 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1416 dst= h->rbsp_buffer[bufidx];
1422 //printf("decoding esc\n");
1425 //remove escapes (very rare 1:2^22)
1426 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1427 if(src[si+2]==3){ //escape
1432 }else //next start code
1436 dst[di++]= src[si++];
1439 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1442 *consumed= si + 1;//+1 for the header
1443 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1448 * identifies the exact end of the bitstream
1449 * @return the length of the trailing, or 0 if damaged
1451 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1455 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1465 * IDCT transforms the 16 dc values and dequantizes them.
1466 * @param qp quantization parameter
1468 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1471 int temp[16]; //FIXME check if this is a good idea
1472 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1473 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1475 //memset(block, 64, 2*256);
1478 const int offset= y_offset[i];
1479 const int z0= block[offset+stride*0] + block[offset+stride*4];
1480 const int z1= block[offset+stride*0] - block[offset+stride*4];
1481 const int z2= block[offset+stride*1] - block[offset+stride*5];
1482 const int z3= block[offset+stride*1] + block[offset+stride*5];
1491 const int offset= x_offset[i];
1492 const int z0= temp[4*0+i] + temp[4*2+i];
1493 const int z1= temp[4*0+i] - temp[4*2+i];
1494 const int z2= temp[4*1+i] - temp[4*3+i];
1495 const int z3= temp[4*1+i] + temp[4*3+i];
1497 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1498 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1499 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1500 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1506 * DCT transforms the 16 dc values.
1507 * @param qp quantization parameter ??? FIXME
1509 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1510 // const int qmul= dequant_coeff[qp][0];
1512 int temp[16]; //FIXME check if this is a good idea
1513 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1514 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1517 const int offset= y_offset[i];
1518 const int z0= block[offset+stride*0] + block[offset+stride*4];
1519 const int z1= block[offset+stride*0] - block[offset+stride*4];
1520 const int z2= block[offset+stride*1] - block[offset+stride*5];
1521 const int z3= block[offset+stride*1] + block[offset+stride*5];
1530 const int offset= x_offset[i];
1531 const int z0= temp[4*0+i] + temp[4*2+i];
1532 const int z1= temp[4*0+i] - temp[4*2+i];
1533 const int z2= temp[4*1+i] - temp[4*3+i];
1534 const int z3= temp[4*1+i] + temp[4*3+i];
1536 block[stride*0 +offset]= (z0 + z3)>>1;
1537 block[stride*2 +offset]= (z1 + z2)>>1;
1538 block[stride*8 +offset]= (z1 - z2)>>1;
1539 block[stride*10+offset]= (z0 - z3)>>1;
1547 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1548 const int stride= 16*2;
1549 const int xStride= 16;
1552 a= block[stride*0 + xStride*0];
1553 b= block[stride*0 + xStride*1];
1554 c= block[stride*1 + xStride*0];
1555 d= block[stride*1 + xStride*1];
1562 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1563 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1564 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1565 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1569 static void chroma_dc_dct_c(DCTELEM *block){
1570 const int stride= 16*2;
1571 const int xStride= 16;
1574 a= block[stride*0 + xStride*0];
1575 b= block[stride*0 + xStride*1];
1576 c= block[stride*1 + xStride*0];
1577 d= block[stride*1 + xStride*1];
1584 block[stride*0 + xStride*0]= (a+c);
1585 block[stride*0 + xStride*1]= (e+b);
1586 block[stride*1 + xStride*0]= (a-c);
1587 block[stride*1 + xStride*1]= (e-b);
1592 * gets the chroma qp.
1594 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1595 return h->pps.chroma_qp_table[t][qscale];
1598 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1599 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1600 int src_x_offset, int src_y_offset,
1601 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1602 MpegEncContext * const s = &h->s;
1603 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1604 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1605 const int luma_xy= (mx&3) + ((my&3)<<2);
1606 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1607 uint8_t * src_cb, * src_cr;
1608 int extra_width= h->emu_edge_width;
1609 int extra_height= h->emu_edge_height;
1611 const int full_mx= mx>>2;
1612 const int full_my= my>>2;
1613 const int pic_width = 16*s->mb_width;
1614 const int pic_height = 16*s->mb_height >> MB_FIELD;
1616 if(mx&7) extra_width -= 3;
1617 if(my&7) extra_height -= 3;
1619 if( full_mx < 0-extra_width
1620 || full_my < 0-extra_height
1621 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1622 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1623 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1624 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1628 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1630 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1633 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1636 // chroma offset when predicting from a field of opposite parity
1637 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1638 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1640 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1641 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1644 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1645 src_cb= s->edge_emu_buffer;
1647 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1650 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1651 src_cr= s->edge_emu_buffer;
1653 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1656 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1657 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1658 int x_offset, int y_offset,
1659 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1660 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1661 int list0, int list1){
1662 MpegEncContext * const s = &h->s;
1663 qpel_mc_func *qpix_op= qpix_put;
1664 h264_chroma_mc_func chroma_op= chroma_put;
1666 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1667 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1668 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1669 x_offset += 8*s->mb_x;
1670 y_offset += 8*(s->mb_y >> MB_FIELD);
1673 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1674 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1675 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1676 qpix_op, chroma_op);
1679 chroma_op= chroma_avg;
1683 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1684 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1685 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1686 qpix_op, chroma_op);
1690 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1691 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1692 int x_offset, int y_offset,
1693 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1694 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1695 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1696 int list0, int list1){
1697 MpegEncContext * const s = &h->s;
1699 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1700 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1701 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1702 x_offset += 8*s->mb_x;
1703 y_offset += 8*(s->mb_y >> MB_FIELD);
1706 /* don't optimize for luma-only case, since B-frames usually
1707 * use implicit weights => chroma too. */
1708 uint8_t *tmp_cb = s->obmc_scratchpad;
1709 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1710 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1711 int refn0 = h->ref_cache[0][ scan8[n] ];
1712 int refn1 = h->ref_cache[1][ scan8[n] ];
1714 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1715 dest_y, dest_cb, dest_cr,
1716 x_offset, y_offset, qpix_put, chroma_put);
1717 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1718 tmp_y, tmp_cb, tmp_cr,
1719 x_offset, y_offset, qpix_put, chroma_put);
1721 if(h->use_weight == 2){
1722 int weight0 = h->implicit_weight[refn0][refn1];
1723 int weight1 = 64 - weight0;
1724 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1725 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1726 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1728 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1729 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1730 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1731 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1732 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1733 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1734 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1735 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1736 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1739 int list = list1 ? 1 : 0;
1740 int refn = h->ref_cache[list][ scan8[n] ];
1741 Picture *ref= &h->ref_list[list][refn];
1742 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1743 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1744 qpix_put, chroma_put);
1746 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1747 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1748 if(h->use_weight_chroma){
1749 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1750 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1751 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1752 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1757 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1758 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1759 int x_offset, int y_offset,
1760 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1761 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1762 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1763 int list0, int list1){
1764 if((h->use_weight==2 && list0 && list1
1765 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1766 || h->use_weight==1)
1767 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1768 x_offset, y_offset, qpix_put, chroma_put,
1769 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1771 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1775 static inline void prefetch_motion(H264Context *h, int list){
1776 /* fetch pixels for estimated mv 4 macroblocks ahead
1777 * optimized for 64byte cache lines */
1778 MpegEncContext * const s = &h->s;
1779 const int refn = h->ref_cache[list][scan8[0]];
1781 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1782 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1783 uint8_t **src= h->ref_list[list][refn].data;
1784 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1785 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1786 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1787 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1791 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1792 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1793 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1794 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1795 MpegEncContext * const s = &h->s;
1796 const int mb_xy= h->mb_xy;
1797 const int mb_type= s->current_picture.mb_type[mb_xy];
1799 assert(IS_INTER(mb_type));
1801 prefetch_motion(h, 0);
1803 if(IS_16X16(mb_type)){
1804 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1805 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1806 &weight_op[0], &weight_avg[0],
1807 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 }else if(IS_16X8(mb_type)){
1809 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1810 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1811 &weight_op[1], &weight_avg[1],
1812 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1813 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1817 }else if(IS_8X16(mb_type)){
1818 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1819 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1820 &weight_op[2], &weight_avg[2],
1821 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1822 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1829 assert(IS_8X8(mb_type));
1832 const int sub_mb_type= h->sub_mb_type[i];
1834 int x_offset= (i&1)<<2;
1835 int y_offset= (i&2)<<1;
1837 if(IS_SUB_8X8(sub_mb_type)){
1838 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1840 &weight_op[3], &weight_avg[3],
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 }else if(IS_SUB_8X4(sub_mb_type)){
1843 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1844 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1845 &weight_op[4], &weight_avg[4],
1846 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1847 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 }else if(IS_SUB_4X8(sub_mb_type)){
1852 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1853 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1854 &weight_op[5], &weight_avg[5],
1855 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1856 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1862 assert(IS_SUB_4X4(sub_mb_type));
1864 int sub_x_offset= x_offset + 2*(j&1);
1865 int sub_y_offset= y_offset + (j&2);
1866 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1867 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1868 &weight_op[6], &weight_avg[6],
1869 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1875 prefetch_motion(h, 1);
1878 static av_cold void decode_init_vlc(void){
1879 static int done = 0;
1886 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1887 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1888 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1889 &chroma_dc_coeff_token_len [0], 1, 1,
1890 &chroma_dc_coeff_token_bits[0], 1, 1,
1891 INIT_VLC_USE_NEW_STATIC);
1895 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1896 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1897 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1898 &coeff_token_len [i][0], 1, 1,
1899 &coeff_token_bits[i][0], 1, 1,
1900 INIT_VLC_USE_NEW_STATIC);
1901 offset += coeff_token_vlc_tables_size[i];
1904 * This is a one time safety check to make sure that
1905 * the packed static coeff_token_vlc table sizes
1906 * were initialized correctly.
1908 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1911 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1912 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1913 init_vlc(&chroma_dc_total_zeros_vlc[i],
1914 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1915 &chroma_dc_total_zeros_len [i][0], 1, 1,
1916 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1917 INIT_VLC_USE_NEW_STATIC);
1919 for(i=0; i<15; i++){
1920 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1921 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1922 init_vlc(&total_zeros_vlc[i],
1923 TOTAL_ZEROS_VLC_BITS, 16,
1924 &total_zeros_len [i][0], 1, 1,
1925 &total_zeros_bits[i][0], 1, 1,
1926 INIT_VLC_USE_NEW_STATIC);
1930 run_vlc[i].table = run_vlc_tables[i];
1931 run_vlc[i].table_allocated = run_vlc_tables_size;
1932 init_vlc(&run_vlc[i],
1934 &run_len [i][0], 1, 1,
1935 &run_bits[i][0], 1, 1,
1936 INIT_VLC_USE_NEW_STATIC);
1938 run7_vlc.table = run7_vlc_table,
1939 run7_vlc.table_allocated = run7_vlc_table_size;
1940 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1941 &run_len [6][0], 1, 1,
1942 &run_bits[6][0], 1, 1,
1943 INIT_VLC_USE_NEW_STATIC);
1947 static void free_tables(H264Context *h){
1950 av_freep(&h->intra4x4_pred_mode);
1951 av_freep(&h->chroma_pred_mode_table);
1952 av_freep(&h->cbp_table);
1953 av_freep(&h->mvd_table[0]);
1954 av_freep(&h->mvd_table[1]);
1955 av_freep(&h->direct_table);
1956 av_freep(&h->non_zero_count);
1957 av_freep(&h->slice_table_base);
1958 h->slice_table= NULL;
1960 av_freep(&h->mb2b_xy);
1961 av_freep(&h->mb2b8_xy);
1963 for(i = 0; i < h->s.avctx->thread_count; i++) {
1964 hx = h->thread_context[i];
1966 av_freep(&hx->top_borders[1]);
1967 av_freep(&hx->top_borders[0]);
1968 av_freep(&hx->s.obmc_scratchpad);
1972 static void init_dequant8_coeff_table(H264Context *h){
1974 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1975 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1976 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1978 for(i=0; i<2; i++ ){
1979 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1980 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1984 for(q=0; q<52; q++){
1985 int shift = div6[q];
1988 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1989 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1990 h->pps.scaling_matrix8[i][x]) << shift;
1995 static void init_dequant4_coeff_table(H264Context *h){
1997 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1998 for(i=0; i<6; i++ ){
1999 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2001 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2002 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2009 for(q=0; q<52; q++){
2010 int shift = div6[q] + 2;
2013 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2014 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2015 h->pps.scaling_matrix4[i][x]) << shift;
2020 static void init_dequant_tables(H264Context *h){
2022 init_dequant4_coeff_table(h);
2023 if(h->pps.transform_8x8_mode)
2024 init_dequant8_coeff_table(h);
2025 if(h->sps.transform_bypass){
2028 h->dequant4_coeff[i][0][x] = 1<<6;
2029 if(h->pps.transform_8x8_mode)
2032 h->dequant8_coeff[i][0][x] = 1<<6;
2039 * needs width/height
2041 static int alloc_tables(H264Context *h){
2042 MpegEncContext * const s = &h->s;
2043 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2046 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2048 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2049 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2050 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2052 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2053 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2054 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2055 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2057 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2058 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2060 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2061 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2062 for(y=0; y<s->mb_height; y++){
2063 for(x=0; x<s->mb_width; x++){
2064 const int mb_xy= x + y*s->mb_stride;
2065 const int b_xy = 4*x + 4*y*h->b_stride;
2066 const int b8_xy= 2*x + 2*y*h->b8_stride;
2068 h->mb2b_xy [mb_xy]= b_xy;
2069 h->mb2b8_xy[mb_xy]= b8_xy;
2073 s->obmc_scratchpad = NULL;
2075 if(!h->dequant4_coeff[0])
2076 init_dequant_tables(h);
2085 * Mimic alloc_tables(), but for every context thread.
2087 static void clone_tables(H264Context *dst, H264Context *src){
2088 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2089 dst->non_zero_count = src->non_zero_count;
2090 dst->slice_table = src->slice_table;
2091 dst->cbp_table = src->cbp_table;
2092 dst->mb2b_xy = src->mb2b_xy;
2093 dst->mb2b8_xy = src->mb2b8_xy;
2094 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2095 dst->mvd_table[0] = src->mvd_table[0];
2096 dst->mvd_table[1] = src->mvd_table[1];
2097 dst->direct_table = src->direct_table;
2099 dst->s.obmc_scratchpad = NULL;
2100 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2105 * Allocate buffers which are not shared amongst multiple threads.
2107 static int context_init(H264Context *h){
2108 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2109 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2113 return -1; // free_tables will clean up for us
2116 static av_cold void common_init(H264Context *h){
2117 MpegEncContext * const s = &h->s;
2119 s->width = s->avctx->width;
2120 s->height = s->avctx->height;
2121 s->codec_id= s->avctx->codec->id;
2123 ff_h264_pred_init(&h->hpc, s->codec_id);
2125 h->dequant_coeff_pps= -1;
2126 s->unrestricted_mv=1;
2127 s->decode=1; //FIXME
2129 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2131 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2132 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2135 static av_cold int decode_init(AVCodecContext *avctx){
2136 H264Context *h= avctx->priv_data;
2137 MpegEncContext * const s = &h->s;
2139 MPV_decode_defaults(s);
2144 s->out_format = FMT_H264;
2145 s->workaround_bugs= avctx->workaround_bugs;
2148 // s->decode_mb= ff_h263_decode_mb;
2149 s->quarter_sample = 1;
2152 if(avctx->codec_id == CODEC_ID_SVQ3)
2153 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2155 avctx->pix_fmt= PIX_FMT_YUV420P;
2159 if(avctx->extradata_size > 0 && avctx->extradata &&
2160 *(char *)avctx->extradata == 1){
2167 h->thread_context[0] = h;
2168 h->outputed_poc = INT_MIN;
2169 h->prev_poc_msb= 1<<16;
2173 static int frame_start(H264Context *h){
2174 MpegEncContext * const s = &h->s;
2177 if(MPV_frame_start(s, s->avctx) < 0)
2179 ff_er_frame_start(s);
2181 * MPV_frame_start uses pict_type to derive key_frame.
2182 * This is incorrect for H.264; IDR markings must be used.
2183 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2184 * See decode_nal_units().
2186 s->current_picture_ptr->key_frame= 0;
2188 assert(s->linesize && s->uvlinesize);
2190 for(i=0; i<16; i++){
2191 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2192 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2195 h->block_offset[16+i]=
2196 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2197 h->block_offset[24+16+i]=
2198 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2201 /* can't be in alloc_tables because linesize isn't known there.
2202 * FIXME: redo bipred weight to not require extra buffer? */
2203 for(i = 0; i < s->avctx->thread_count; i++)
2204 if(!h->thread_context[i]->s.obmc_scratchpad)
2205 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2207 /* some macroblocks will be accessed before they're available */
2208 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2209 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2211 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2213 // We mark the current picture as non-reference after allocating it, so
2214 // that if we break out due to an error it can be released automatically
2215 // in the next MPV_frame_start().
2216 // SVQ3 as well as most other codecs have only last/next/current and thus
2217 // get released even with set reference, besides SVQ3 and others do not
2218 // mark frames as reference later "naturally".
2219 if(s->codec_id != CODEC_ID_SVQ3)
2220 s->current_picture_ptr->reference= 0;
2222 s->current_picture_ptr->field_poc[0]=
2223 s->current_picture_ptr->field_poc[1]= INT_MAX;
2224 assert(s->current_picture_ptr->long_ref==0);
2229 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2230 MpegEncContext * const s = &h->s;
2239 src_cb -= uvlinesize;
2240 src_cr -= uvlinesize;
2242 if(!simple && FRAME_MBAFF){
2244 offset = MB_MBAFF ? 1 : 17;
2245 uvoffset= MB_MBAFF ? 1 : 9;
2247 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2248 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2249 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2250 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2251 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2256 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2257 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2258 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2259 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2265 top_idx = MB_MBAFF ? 0 : 1;
2267 step= MB_MBAFF ? 2 : 1;
2270 // There are two lines saved, the line above the the top macroblock of a pair,
2271 // and the line above the bottom macroblock
2272 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2273 for(i=1; i<17 - skiplast; i++){
2274 h->left_border[offset+i*step]= src_y[15+i* linesize];
2277 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2278 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2280 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2281 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2282 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2283 for(i=1; i<9 - skiplast; i++){
2284 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2285 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2287 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2288 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2292 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2293 MpegEncContext * const s = &h->s;
2304 if(!simple && FRAME_MBAFF){
2306 offset = MB_MBAFF ? 1 : 17;
2307 uvoffset= MB_MBAFF ? 1 : 9;
2311 top_idx = MB_MBAFF ? 0 : 1;
2313 step= MB_MBAFF ? 2 : 1;
2316 if(h->deblocking_filter == 2) {
2318 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2319 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2321 deblock_left = (s->mb_x > 0);
2322 deblock_top = (s->mb_y > !!MB_FIELD);
2325 src_y -= linesize + 1;
2326 src_cb -= uvlinesize + 1;
2327 src_cr -= uvlinesize + 1;
2329 #define XCHG(a,b,t,xchg)\
2336 for(i = !deblock_top; i<16; i++){
2337 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2339 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2343 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2344 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2345 if(s->mb_x+1 < s->mb_width){
2346 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2350 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2352 for(i = !deblock_top; i<8; i++){
2353 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2354 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2356 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2357 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2360 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2361 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2366 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2367 MpegEncContext * const s = &h->s;
2368 const int mb_x= s->mb_x;
2369 const int mb_y= s->mb_y;
2370 const int mb_xy= h->mb_xy;
2371 const int mb_type= s->current_picture.mb_type[mb_xy];
2372 uint8_t *dest_y, *dest_cb, *dest_cr;
2373 int linesize, uvlinesize /*dct_offset*/;
2375 int *block_offset = &h->block_offset[0];
2376 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2377 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2378 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2379 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2381 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2382 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2383 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2385 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2386 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2388 if (!simple && MB_FIELD) {
2389 linesize = h->mb_linesize = s->linesize * 2;
2390 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2391 block_offset = &h->block_offset[24];
2392 if(mb_y&1){ //FIXME move out of this function?
2393 dest_y -= s->linesize*15;
2394 dest_cb-= s->uvlinesize*7;
2395 dest_cr-= s->uvlinesize*7;
2399 for(list=0; list<h->list_count; list++){
2400 if(!USES_LIST(mb_type, list))
2402 if(IS_16X16(mb_type)){
2403 int8_t *ref = &h->ref_cache[list][scan8[0]];
2404 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2406 for(i=0; i<16; i+=4){
2407 int ref = h->ref_cache[list][scan8[i]];
2409 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2415 linesize = h->mb_linesize = s->linesize;
2416 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2417 // dct_offset = s->linesize * 16;
2420 if (!simple && IS_INTRA_PCM(mb_type)) {
2421 for (i=0; i<16; i++) {
2422 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2424 for (i=0; i<8; i++) {
2425 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2426 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2429 if(IS_INTRA(mb_type)){
2430 if(h->deblocking_filter)
2431 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2433 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2434 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2435 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2438 if(IS_INTRA4x4(mb_type)){
2439 if(simple || !s->encoding){
2440 if(IS_8x8DCT(mb_type)){
2441 if(transform_bypass){
2443 idct_add = s->dsp.add_pixels8;
2445 idct_dc_add = s->dsp.h264_idct8_dc_add;
2446 idct_add = s->dsp.h264_idct8_add;
2448 for(i=0; i<16; i+=4){
2449 uint8_t * const ptr= dest_y + block_offset[i];
2450 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2451 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2452 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2454 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2455 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2456 (h->topright_samples_available<<i)&0x4000, linesize);
2458 if(nnz == 1 && h->mb[i*16])
2459 idct_dc_add(ptr, h->mb + i*16, linesize);
2461 idct_add (ptr, h->mb + i*16, linesize);
2466 if(transform_bypass){
2468 idct_add = s->dsp.add_pixels4;
2470 idct_dc_add = s->dsp.h264_idct_dc_add;
2471 idct_add = s->dsp.h264_idct_add;
2473 for(i=0; i<16; i++){
2474 uint8_t * const ptr= dest_y + block_offset[i];
2475 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2477 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2478 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2482 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2483 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2484 assert(mb_y || linesize <= block_offset[i]);
2485 if(!topright_avail){
2486 tr= ptr[3 - linesize]*0x01010101;
2487 topright= (uint8_t*) &tr;
2489 topright= ptr + 4 - linesize;
2493 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2494 nnz = h->non_zero_count_cache[ scan8[i] ];
2497 if(nnz == 1 && h->mb[i*16])
2498 idct_dc_add(ptr, h->mb + i*16, linesize);
2500 idct_add (ptr, h->mb + i*16, linesize);
2502 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2509 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2511 if(!transform_bypass)
2512 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2514 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2516 if(h->deblocking_filter)
2517 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2519 hl_motion(h, dest_y, dest_cb, dest_cr,
2520 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2521 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2522 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2526 if(!IS_INTRA4x4(mb_type)){
2528 if(IS_INTRA16x16(mb_type)){
2529 if(transform_bypass){
2530 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2531 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2533 for(i=0; i<16; i++){
2534 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2535 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2539 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2541 }else if(h->cbp&15){
2542 if(transform_bypass){
2543 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2544 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2545 for(i=0; i<16; i+=di){
2546 if(h->non_zero_count_cache[ scan8[i] ]){
2547 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2551 if(IS_8x8DCT(mb_type)){
2552 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2554 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2559 for(i=0; i<16; i++){
2560 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2561 uint8_t * const ptr= dest_y + block_offset[i];
2562 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2568 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2569 uint8_t *dest[2] = {dest_cb, dest_cr};
2570 if(transform_bypass){
2571 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2572 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2573 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2575 idct_add = s->dsp.add_pixels4;
2576 for(i=16; i<16+8; i++){
2577 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2578 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2582 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2583 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2585 idct_add = s->dsp.h264_idct_add;
2586 idct_dc_add = s->dsp.h264_idct_dc_add;
2587 for(i=16; i<16+8; i++){
2588 if(h->non_zero_count_cache[ scan8[i] ])
2589 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2590 else if(h->mb[i*16])
2591 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2594 for(i=16; i<16+8; i++){
2595 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2596 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2597 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2604 if(h->cbp || IS_INTRA(mb_type))
2605 s->dsp.clear_blocks(h->mb);
2607 if(h->deblocking_filter) {
2608 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2609 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2610 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2611 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2612 if (!simple && FRAME_MBAFF) {
2613 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2615 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2621 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2623 static void hl_decode_mb_simple(H264Context *h){
2624 hl_decode_mb_internal(h, 1);
2628 * Process a macroblock; this handles edge cases, such as interlacing.
2630 static void av_noinline hl_decode_mb_complex(H264Context *h){
2631 hl_decode_mb_internal(h, 0);
2634 static void hl_decode_mb(H264Context *h){
2635 MpegEncContext * const s = &h->s;
2636 const int mb_xy= h->mb_xy;
2637 const int mb_type= s->current_picture.mb_type[mb_xy];
2638 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2640 if(ENABLE_H264_ENCODER && !s->decode)
2644 hl_decode_mb_complex(h);
2645 else hl_decode_mb_simple(h);
2648 static void pic_as_field(Picture *pic, const int parity){
2650 for (i = 0; i < 4; ++i) {
2651 if (parity == PICT_BOTTOM_FIELD)
2652 pic->data[i] += pic->linesize[i];
2653 pic->reference = parity;
2654 pic->linesize[i] *= 2;
2656 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2659 static int split_field_copy(Picture *dest, Picture *src,
2660 int parity, int id_add){
2661 int match = !!(src->reference & parity);
2665 if(parity != PICT_FRAME){
2666 pic_as_field(dest, parity);
2668 dest->pic_id += id_add;
2675 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2679 while(i[0]<len || i[1]<len){
2680 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2682 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2685 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2686 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2689 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2690 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2697 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2702 best_poc= dir ? INT_MIN : INT_MAX;
2704 for(i=0; i<len; i++){
2705 const int poc= src[i]->poc;
2706 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2708 sorted[out_i]= src[i];
2711 if(best_poc == (dir ? INT_MIN : INT_MAX))
2713 limit= sorted[out_i++]->poc - dir;
2719 * fills the default_ref_list.
2721 static int fill_default_ref_list(H264Context *h){
2722 MpegEncContext * const s = &h->s;
2725 if(h->slice_type_nos==FF_B_TYPE){
2726 Picture *sorted[32];
2731 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2733 cur_poc= s->current_picture_ptr->poc;
2735 for(list= 0; list<2; list++){
2736 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2737 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2739 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2740 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2743 if(len < h->ref_count[list])
2744 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2748 if(lens[0] == lens[1] && lens[1] > 1){
2749 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2751 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2754 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2755 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2757 if(len < h->ref_count[0])
2758 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2761 for (i=0; i<h->ref_count[0]; i++) {
2762 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2764 if(h->slice_type_nos==FF_B_TYPE){
2765 for (i=0; i<h->ref_count[1]; i++) {
2766 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2773 static void print_short_term(H264Context *h);
2774 static void print_long_term(H264Context *h);
2777 * Extract structure information about the picture described by pic_num in
2778 * the current decoding context (frame or field). Note that pic_num is
2779 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2780 * @param pic_num picture number for which to extract structure information
2781 * @param structure one of PICT_XXX describing structure of picture
2783 * @return frame number (short term) or long term index of picture
2784 * described by pic_num
2786 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2787 MpegEncContext * const s = &h->s;
2789 *structure = s->picture_structure;
2792 /* opposite field */
2793 *structure ^= PICT_FRAME;
2800 static int decode_ref_pic_list_reordering(H264Context *h){
2801 MpegEncContext * const s = &h->s;
2802 int list, index, pic_structure;
2804 print_short_term(h);
2807 for(list=0; list<h->list_count; list++){
2808 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2810 if(get_bits1(&s->gb)){
2811 int pred= h->curr_pic_num;
2813 for(index=0; ; index++){
2814 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2815 unsigned int pic_id;
2817 Picture *ref = NULL;
2819 if(reordering_of_pic_nums_idc==3)
2822 if(index >= h->ref_count[list]){
2823 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2827 if(reordering_of_pic_nums_idc<3){
2828 if(reordering_of_pic_nums_idc<2){
2829 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2832 if(abs_diff_pic_num > h->max_pic_num){
2833 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2837 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2838 else pred+= abs_diff_pic_num;
2839 pred &= h->max_pic_num - 1;
2841 frame_num = pic_num_extract(h, pred, &pic_structure);
2843 for(i= h->short_ref_count-1; i>=0; i--){
2844 ref = h->short_ref[i];
2845 assert(ref->reference);
2846 assert(!ref->long_ref);
2848 ref->frame_num == frame_num &&
2849 (ref->reference & pic_structure)
2857 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2859 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2862 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2865 ref = h->long_ref[long_idx];
2866 assert(!(ref && !ref->reference));
2867 if(ref && (ref->reference & pic_structure)){
2868 ref->pic_id= pic_id;
2869 assert(ref->long_ref);
2877 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2878 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2880 for(i=index; i+1<h->ref_count[list]; i++){
2881 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2884 for(; i > index; i--){
2885 h->ref_list[list][i]= h->ref_list[list][i-1];
2887 h->ref_list[list][index]= *ref;
2889 pic_as_field(&h->ref_list[list][index], pic_structure);
2893 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2899 for(list=0; list<h->list_count; list++){
2900 for(index= 0; index < h->ref_count[list]; index++){
2901 if(!h->ref_list[list][index].data[0]){
2902 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2903 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2911 static void fill_mbaff_ref_list(H264Context *h){
2913 for(list=0; list<2; list++){ //FIXME try list_count
2914 for(i=0; i<h->ref_count[list]; i++){
2915 Picture *frame = &h->ref_list[list][i];
2916 Picture *field = &h->ref_list[list][16+2*i];
2919 field[0].linesize[j] <<= 1;
2920 field[0].reference = PICT_TOP_FIELD;
2921 field[0].poc= field[0].field_poc[0];
2922 field[1] = field[0];
2924 field[1].data[j] += frame->linesize[j];
2925 field[1].reference = PICT_BOTTOM_FIELD;
2926 field[1].poc= field[1].field_poc[1];
2928 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2929 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2931 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2932 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2936 for(j=0; j<h->ref_count[1]; j++){
2937 for(i=0; i<h->ref_count[0]; i++)
2938 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2939 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2940 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2944 static int pred_weight_table(H264Context *h){
2945 MpegEncContext * const s = &h->s;
2947 int luma_def, chroma_def;
2950 h->use_weight_chroma= 0;
2951 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2952 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2953 luma_def = 1<<h->luma_log2_weight_denom;
2954 chroma_def = 1<<h->chroma_log2_weight_denom;
2956 for(list=0; list<2; list++){
2957 for(i=0; i<h->ref_count[list]; i++){
2958 int luma_weight_flag, chroma_weight_flag;
2960 luma_weight_flag= get_bits1(&s->gb);
2961 if(luma_weight_flag){
2962 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2963 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2964 if( h->luma_weight[list][i] != luma_def
2965 || h->luma_offset[list][i] != 0)
2968 h->luma_weight[list][i]= luma_def;
2969 h->luma_offset[list][i]= 0;
2973 chroma_weight_flag= get_bits1(&s->gb);
2974 if(chroma_weight_flag){
2977 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2978 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2979 if( h->chroma_weight[list][i][j] != chroma_def
2980 || h->chroma_offset[list][i][j] != 0)
2981 h->use_weight_chroma= 1;
2986 h->chroma_weight[list][i][j]= chroma_def;
2987 h->chroma_offset[list][i][j]= 0;
2992 if(h->slice_type_nos != FF_B_TYPE) break;
2994 h->use_weight= h->use_weight || h->use_weight_chroma;
2998 static void implicit_weight_table(H264Context *h){
2999 MpegEncContext * const s = &h->s;
3001 int cur_poc = s->current_picture_ptr->poc;
3003 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3004 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3006 h->use_weight_chroma= 0;
3011 h->use_weight_chroma= 2;
3012 h->luma_log2_weight_denom= 5;
3013 h->chroma_log2_weight_denom= 5;
3015 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3016 int poc0 = h->ref_list[0][ref0].poc;
3017 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3018 int poc1 = h->ref_list[1][ref1].poc;
3019 int td = av_clip(poc1 - poc0, -128, 127);
3021 int tb = av_clip(cur_poc - poc0, -128, 127);
3022 int tx = (16384 + (FFABS(td) >> 1)) / td;
3023 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3024 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3025 h->implicit_weight[ref0][ref1] = 32;
3027 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3029 h->implicit_weight[ref0][ref1] = 32;
3035 * Mark a picture as no longer needed for reference. The refmask
3036 * argument allows unreferencing of individual fields or the whole frame.
3037 * If the picture becomes entirely unreferenced, but is being held for
3038 * display purposes, it is marked as such.
3039 * @param refmask mask of fields to unreference; the mask is bitwise
3040 * anded with the reference marking of pic
3041 * @return non-zero if pic becomes entirely unreferenced (except possibly
3042 * for display purposes) zero if one of the fields remains in
3045 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3047 if (pic->reference &= refmask) {
3050 for(i = 0; h->delayed_pic[i]; i++)
3051 if(pic == h->delayed_pic[i]){
3052 pic->reference=DELAYED_PIC_REF;
3060 * instantaneous decoder refresh.
3062 static void idr(H264Context *h){
3065 for(i=0; i<16; i++){
3066 remove_long(h, i, 0);
3068 assert(h->long_ref_count==0);
3070 for(i=0; i<h->short_ref_count; i++){
3071 unreference_pic(h, h->short_ref[i], 0);
3072 h->short_ref[i]= NULL;
3074 h->short_ref_count=0;
3075 h->prev_frame_num= 0;
3076 h->prev_frame_num_offset= 0;
3081 /* forget old pics after a seek */
3082 static void flush_dpb(AVCodecContext *avctx){
3083 H264Context *h= avctx->priv_data;
3085 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3086 if(h->delayed_pic[i])
3087 h->delayed_pic[i]->reference= 0;
3088 h->delayed_pic[i]= NULL;
3090 h->outputed_poc= INT_MIN;
3092 if(h->s.current_picture_ptr)
3093 h->s.current_picture_ptr->reference= 0;
3094 h->s.first_field= 0;
3095 ff_mpeg_flush(avctx);
3099 * Find a Picture in the short term reference list by frame number.
3100 * @param frame_num frame number to search for
3101 * @param idx the index into h->short_ref where returned picture is found
3102 * undefined if no picture found.
3103 * @return pointer to the found picture, or NULL if no pic with the provided
3104 * frame number is found
3106 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3107 MpegEncContext * const s = &h->s;
3110 for(i=0; i<h->short_ref_count; i++){
3111 Picture *pic= h->short_ref[i];
3112 if(s->avctx->debug&FF_DEBUG_MMCO)
3113 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3114 if(pic->frame_num == frame_num) {
3123 * Remove a picture from the short term reference list by its index in
3124 * that list. This does no checking on the provided index; it is assumed
3125 * to be valid. Other list entries are shifted down.
3126 * @param i index into h->short_ref of picture to remove.
3128 static void remove_short_at_index(H264Context *h, int i){
3129 assert(i >= 0 && i < h->short_ref_count);
3130 h->short_ref[i]= NULL;
3131 if (--h->short_ref_count)
3132 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3137 * @return the removed picture or NULL if an error occurs
3139 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3140 MpegEncContext * const s = &h->s;
3144 if(s->avctx->debug&FF_DEBUG_MMCO)
3145 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3147 pic = find_short(h, frame_num, &i);
3149 if(unreference_pic(h, pic, ref_mask))
3150 remove_short_at_index(h, i);
3157 * Remove a picture from the long term reference list by its index in
3159 * @return the removed picture or NULL if an error occurs
3161 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3164 pic= h->long_ref[i];
3166 if(unreference_pic(h, pic, ref_mask)){
3167 assert(h->long_ref[i]->long_ref == 1);
3168 h->long_ref[i]->long_ref= 0;
3169 h->long_ref[i]= NULL;
3170 h->long_ref_count--;
3178 * print short term list
3180 static void print_short_term(H264Context *h) {
3182 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3183 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3184 for(i=0; i<h->short_ref_count; i++){
3185 Picture *pic= h->short_ref[i];
3186 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3192 * print long term list
3194 static void print_long_term(H264Context *h) {
3196 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3197 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3198 for(i = 0; i < 16; i++){
3199 Picture *pic= h->long_ref[i];
3201 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3208 * Executes the reference picture marking (memory management control operations).
3210 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3211 MpegEncContext * const s = &h->s;
3213 int current_ref_assigned=0;
3216 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3217 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3219 for(i=0; i<mmco_count; i++){
3220 int structure, frame_num;
3221 if(s->avctx->debug&FF_DEBUG_MMCO)
3222 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3224 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3225 || mmco[i].opcode == MMCO_SHORT2LONG){
3226 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3227 pic = find_short(h, frame_num, &j);
3229 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3230 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3231 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3236 switch(mmco[i].opcode){
3237 case MMCO_SHORT2UNUSED:
3238 if(s->avctx->debug&FF_DEBUG_MMCO)
3239 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3240 remove_short(h, frame_num, structure ^ PICT_FRAME);
3242 case MMCO_SHORT2LONG:
3243 if (h->long_ref[mmco[i].long_arg] != pic)
3244 remove_long(h, mmco[i].long_arg, 0);
3246 remove_short_at_index(h, j);
3247 h->long_ref[ mmco[i].long_arg ]= pic;
3248 if (h->long_ref[ mmco[i].long_arg ]){
3249 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3250 h->long_ref_count++;
3253 case MMCO_LONG2UNUSED:
3254 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3255 pic = h->long_ref[j];
3257 remove_long(h, j, structure ^ PICT_FRAME);
3258 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3262 // Comment below left from previous code as it is an interresting note.
3263 /* First field in pair is in short term list or
3264 * at a different long term index.
3265 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3266 * Report the problem and keep the pair where it is,
3267 * and mark this field valid.
3270 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3271 remove_long(h, mmco[i].long_arg, 0);
3273 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3274 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3275 h->long_ref_count++;
3278 s->current_picture_ptr->reference |= s->picture_structure;
3279 current_ref_assigned=1;
3281 case MMCO_SET_MAX_LONG:
3282 assert(mmco[i].long_arg <= 16);
3283 // just remove the long term which index is greater than new max
3284 for(j = mmco[i].long_arg; j<16; j++){
3285 remove_long(h, j, 0);
3289 while(h->short_ref_count){
3290 remove_short(h, h->short_ref[0]->frame_num, 0);
3292 for(j = 0; j < 16; j++) {
3293 remove_long(h, j, 0);
3295 s->current_picture_ptr->poc=
3296 s->current_picture_ptr->field_poc[0]=
3297 s->current_picture_ptr->field_poc[1]=
3301 s->current_picture_ptr->frame_num= 0;
3307 if (!current_ref_assigned) {
3308 /* Second field of complementary field pair; the first field of
3309 * which is already referenced. If short referenced, it
3310 * should be first entry in short_ref. If not, it must exist
3311 * in long_ref; trying to put it on the short list here is an
3312 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3314 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3315 /* Just mark the second field valid */
3316 s->current_picture_ptr->reference = PICT_FRAME;
3317 } else if (s->current_picture_ptr->long_ref) {
3318 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3319 "assignment for second field "
3320 "in complementary field pair "
3321 "(first field is long term)\n");
3323 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3325 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3328 if(h->short_ref_count)
3329 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3331 h->short_ref[0]= s->current_picture_ptr;
3332 h->short_ref_count++;
3333 s->current_picture_ptr->reference |= s->picture_structure;
3337 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3339 /* We have too many reference frames, probably due to corrupted
3340 * stream. Need to discard one frame. Prevents overrun of the
3341 * short_ref and long_ref buffers.
3343 av_log(h->s.avctx, AV_LOG_ERROR,
3344 "number of reference frames exceeds max (probably "
3345 "corrupt input), discarding one\n");
3347 if (h->long_ref_count && !h->short_ref_count) {
3348 for (i = 0; i < 16; ++i)
3353 remove_long(h, i, 0);
3355 pic = h->short_ref[h->short_ref_count - 1];
3356 remove_short(h, pic->frame_num, 0);
3360 print_short_term(h);
3365 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3366 MpegEncContext * const s = &h->s;
3370 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3371 s->broken_link= get_bits1(gb) -1;
3373 h->mmco[0].opcode= MMCO_LONG;
3374 h->mmco[0].long_arg= 0;
3378 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3379 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3380 MMCOOpcode opcode= get_ue_golomb(gb);
3382 h->mmco[i].opcode= opcode;
3383 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3384 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3385 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3386 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3390 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3391 unsigned int long_arg= get_ue_golomb(gb);
3392 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3393 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3396 h->mmco[i].long_arg= long_arg;
3399 if(opcode > (unsigned)MMCO_LONG){
3400 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3403 if(opcode == MMCO_END)
3408 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3410 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3411 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3412 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3413 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3415 if (FIELD_PICTURE) {
3416 h->mmco[0].short_pic_num *= 2;
3417 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3418 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3428 static int init_poc(H264Context *h){
3429 MpegEncContext * const s = &h->s;
3430 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3432 Picture *cur = s->current_picture_ptr;
3434 h->frame_num_offset= h->prev_frame_num_offset;
3435 if(h->frame_num < h->prev_frame_num)
3436 h->frame_num_offset += max_frame_num;
3438 if(h->sps.poc_type==0){
3439 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3441 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3442 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3443 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3444 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3446 h->poc_msb = h->prev_poc_msb;
3447 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3449 field_poc[1] = h->poc_msb + h->poc_lsb;
3450 if(s->picture_structure == PICT_FRAME)
3451 field_poc[1] += h->delta_poc_bottom;
3452 }else if(h->sps.poc_type==1){
3453 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3456 if(h->sps.poc_cycle_length != 0)
3457 abs_frame_num = h->frame_num_offset + h->frame_num;
3461 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3464 expected_delta_per_poc_cycle = 0;
3465 for(i=0; i < h->sps.poc_cycle_length; i++)
3466 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3468 if(abs_frame_num > 0){
3469 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3470 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3472 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3473 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3474 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3478 if(h->nal_ref_idc == 0)
3479 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3481 field_poc[0] = expectedpoc + h->delta_poc[0];
3482 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3484 if(s->picture_structure == PICT_FRAME)
3485 field_poc[1] += h->delta_poc[1];
3487 int poc= 2*(h->frame_num_offset + h->frame_num);
3496 if(s->picture_structure != PICT_BOTTOM_FIELD)
3497 s->current_picture_ptr->field_poc[0]= field_poc[0];
3498 if(s->picture_structure != PICT_TOP_FIELD)
3499 s->current_picture_ptr->field_poc[1]= field_poc[1];
3500 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3507 * initialize scan tables
3509 static void init_scan_tables(H264Context *h){
3510 MpegEncContext * const s = &h->s;
3512 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3513 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3514 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3516 for(i=0; i<16; i++){
3517 #define T(x) (x>>2) | ((x<<2) & 0xF)
3518 h->zigzag_scan[i] = T(zigzag_scan[i]);
3519 h-> field_scan[i] = T( field_scan[i]);
3523 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3524 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3525 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3526 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3527 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3529 for(i=0; i<64; i++){
3530 #define T(x) (x>>3) | ((x&7)<<3)
3531 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3532 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3533 h->field_scan8x8[i] = T(field_scan8x8[i]);
3534 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3538 if(h->sps.transform_bypass){ //FIXME same ugly
3539 h->zigzag_scan_q0 = zigzag_scan;
3540 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3541 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3542 h->field_scan_q0 = field_scan;
3543 h->field_scan8x8_q0 = field_scan8x8;
3544 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3546 h->zigzag_scan_q0 = h->zigzag_scan;
3547 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3548 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3549 h->field_scan_q0 = h->field_scan;
3550 h->field_scan8x8_q0 = h->field_scan8x8;
3551 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3556 * Replicates H264 "master" context to thread contexts.
3558 static void clone_slice(H264Context *dst, H264Context *src)
3560 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3561 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3562 dst->s.current_picture = src->s.current_picture;
3563 dst->s.linesize = src->s.linesize;
3564 dst->s.uvlinesize = src->s.uvlinesize;
3565 dst->s.first_field = src->s.first_field;
3567 dst->prev_poc_msb = src->prev_poc_msb;
3568 dst->prev_poc_lsb = src->prev_poc_lsb;
3569 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3570 dst->prev_frame_num = src->prev_frame_num;
3571 dst->short_ref_count = src->short_ref_count;
3573 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3574 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3575 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3576 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3578 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3579 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3583 * decodes a slice header.
3584 * This will also call MPV_common_init() and frame_start() as needed.
3586 * @param h h264context
3587 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3589 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3591 static int decode_slice_header(H264Context *h, H264Context *h0){
3592 MpegEncContext * const s = &h->s;
3593 MpegEncContext * const s0 = &h0->s;
3594 unsigned int first_mb_in_slice;
3595 unsigned int pps_id;
3596 int num_ref_idx_active_override_flag;
3597 unsigned int slice_type, tmp, i, j;
3598 int default_ref_list_done = 0;
3599 int last_pic_structure;
3601 s->dropable= h->nal_ref_idc == 0;
3603 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3604 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3605 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3607 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3608 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3611 first_mb_in_slice= get_ue_golomb(&s->gb);
3613 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3614 h0->current_slice = 0;
3615 if (!s0->first_field)
3616 s->current_picture_ptr= NULL;
3619 slice_type= get_ue_golomb(&s->gb);
3621 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3626 h->slice_type_fixed=1;
3628 h->slice_type_fixed=0;
3630 slice_type= golomb_to_pict_type[ slice_type ];
3631 if (slice_type == FF_I_TYPE
3632 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3633 default_ref_list_done = 1;
3635 h->slice_type= slice_type;
3636 h->slice_type_nos= slice_type & 3;
3638 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3639 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3640 av_log(h->s.avctx, AV_LOG_ERROR,
3641 "B picture before any references, skipping\n");
3645 pps_id= get_ue_golomb(&s->gb);
3646 if(pps_id>=MAX_PPS_COUNT){
3647 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3650 if(!h0->pps_buffers[pps_id]) {
3651 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3654 h->pps= *h0->pps_buffers[pps_id];
3656 if(!h0->sps_buffers[h->pps.sps_id]) {
3657 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3660 h->sps = *h0->sps_buffers[h->pps.sps_id];
3662 if(h == h0 && h->dequant_coeff_pps != pps_id){
3663 h->dequant_coeff_pps = pps_id;
3664 init_dequant_tables(h);
3667 s->mb_width= h->sps.mb_width;
3668 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3670 h->b_stride= s->mb_width*4;
3671 h->b8_stride= s->mb_width*2;
3673 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3674 if(h->sps.frame_mbs_only_flag)
3675 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3677 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3679 if (s->context_initialized
3680 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3682 return -1; // width / height changed during parallelized decoding
3684 flush_dpb(s->avctx);
3687 if (!s->context_initialized) {
3689 return -1; // we cant (re-)initialize context during parallel decoding
3690 if (MPV_common_init(s) < 0)
3694 init_scan_tables(h);
3697 for(i = 1; i < s->avctx->thread_count; i++) {
3699 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3700 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3701 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3704 init_scan_tables(c);
3708 for(i = 0; i < s->avctx->thread_count; i++)
3709 if(context_init(h->thread_context[i]) < 0)
3712 s->avctx->width = s->width;
3713 s->avctx->height = s->height;
3714 s->avctx->sample_aspect_ratio= h->sps.sar;
3715 if(!s->avctx->sample_aspect_ratio.den)
3716 s->avctx->sample_aspect_ratio.den = 1;
3718 if(h->sps.timing_info_present_flag){
3719 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3720 if(h->x264_build > 0 && h->x264_build < 44)
3721 s->avctx->time_base.den *= 2;
3722 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3723 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3727 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3730 h->mb_aff_frame = 0;
3731 last_pic_structure = s0->picture_structure;
3732 if(h->sps.frame_mbs_only_flag){
3733 s->picture_structure= PICT_FRAME;
3735 if(get_bits1(&s->gb)) { //field_pic_flag
3736 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3738 s->picture_structure= PICT_FRAME;
3739 h->mb_aff_frame = h->sps.mb_aff;
3742 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3744 if(h0->current_slice == 0){
3745 while(h->frame_num != h->prev_frame_num &&
3746 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3747 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3749 h->prev_frame_num++;
3750 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3751 s->current_picture_ptr->frame_num= h->prev_frame_num;
3752 execute_ref_pic_marking(h, NULL, 0);
3755 /* See if we have a decoded first field looking for a pair... */
3756 if (s0->first_field) {
3757 assert(s0->current_picture_ptr);
3758 assert(s0->current_picture_ptr->data[0]);
3759 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3761 /* figure out if we have a complementary field pair */
3762 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3764 * Previous field is unmatched. Don't display it, but let it
3765 * remain for reference if marked as such.
3767 s0->current_picture_ptr = NULL;
3768 s0->first_field = FIELD_PICTURE;
3771 if (h->nal_ref_idc &&
3772 s0->current_picture_ptr->reference &&
3773 s0->current_picture_ptr->frame_num != h->frame_num) {
3775 * This and previous field were reference, but had
3776 * different frame_nums. Consider this field first in
3777 * pair. Throw away previous field except for reference
3780 s0->first_field = 1;
3781 s0->current_picture_ptr = NULL;
3784 /* Second field in complementary pair */
3785 s0->first_field = 0;
3790 /* Frame or first field in a potentially complementary pair */
3791 assert(!s0->current_picture_ptr);
3792 s0->first_field = FIELD_PICTURE;
3795 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3796 s0->first_field = 0;
3803 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3805 assert(s->mb_num == s->mb_width * s->mb_height);
3806 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3807 first_mb_in_slice >= s->mb_num){
3808 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3811 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3812 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3813 if (s->picture_structure == PICT_BOTTOM_FIELD)
3814 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3815 assert(s->mb_y < s->mb_height);
3817 if(s->picture_structure==PICT_FRAME){
3818 h->curr_pic_num= h->frame_num;
3819 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3821 h->curr_pic_num= 2*h->frame_num + 1;
3822 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3825 if(h->nal_unit_type == NAL_IDR_SLICE){
3826 get_ue_golomb(&s->gb); /* idr_pic_id */
3829 if(h->sps.poc_type==0){
3830 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3832 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3833 h->delta_poc_bottom= get_se_golomb(&s->gb);
3837 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3838 h->delta_poc[0]= get_se_golomb(&s->gb);
3840 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3841 h->delta_poc[1]= get_se_golomb(&s->gb);
3846 if(h->pps.redundant_pic_cnt_present){
3847 h->redundant_pic_count= get_ue_golomb(&s->gb);
3850 //set defaults, might be overridden a few lines later
3851 h->ref_count[0]= h->pps.ref_count[0];
3852 h->ref_count[1]= h->pps.ref_count[1];
3854 if(h->slice_type_nos != FF_I_TYPE){
3855 if(h->slice_type_nos == FF_B_TYPE){
3856 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3858 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3860 if(num_ref_idx_active_override_flag){
3861 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3862 if(h->slice_type_nos==FF_B_TYPE)
3863 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3865 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3866 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3867 h->ref_count[0]= h->ref_count[1]= 1;
3871 if(h->slice_type_nos == FF_B_TYPE)
3878 if(!default_ref_list_done){
3879 fill_default_ref_list(h);
3882 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3885 if(h->slice_type_nos!=FF_I_TYPE){
3886 s->last_picture_ptr= &h->ref_list[0][0];
3887 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3889 if(h->slice_type_nos==FF_B_TYPE){
3890 s->next_picture_ptr= &h->ref_list[1][0];
3891 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3894 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3895 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3896 pred_weight_table(h);
3897 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3898 implicit_weight_table(h);
3903 decode_ref_pic_marking(h0, &s->gb);
3906 fill_mbaff_ref_list(h);
3908 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3909 direct_dist_scale_factor(h);
3910 direct_ref_list_init(h);
3912 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3913 tmp = get_ue_golomb(&s->gb);
3915 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3918 h->cabac_init_idc= tmp;
3921 h->last_qscale_diff = 0;
3922 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3924 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3928 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3929 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3930 //FIXME qscale / qp ... stuff
3931 if(h->slice_type == FF_SP_TYPE){
3932 get_bits1(&s->gb); /* sp_for_switch_flag */
3934 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3935 get_se_golomb(&s->gb); /* slice_qs_delta */
3938 h->deblocking_filter = 1;
3939 h->slice_alpha_c0_offset = 0;
3940 h->slice_beta_offset = 0;
3941 if( h->pps.deblocking_filter_parameters_present ) {
3942 tmp= get_ue_golomb(&s->gb);
3944 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3947 h->deblocking_filter= tmp;
3948 if(h->deblocking_filter < 2)
3949 h->deblocking_filter^= 1; // 1<->0
3951 if( h->deblocking_filter ) {
3952 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3953 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3957 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3958 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3959 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3960 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3961 h->deblocking_filter= 0;
3963 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3964 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3965 /* Cheat slightly for speed:
3966 Do not bother to deblock across slices. */
3967 h->deblocking_filter = 2;
3969 h0->max_contexts = 1;
3970 if(!h0->single_decode_warning) {
3971 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3972 h0->single_decode_warning = 1;
3975 return 1; // deblocking switched inside frame
3980 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3981 slice_group_change_cycle= get_bits(&s->gb, ?);
3984 h0->last_slice_type = slice_type;
3985 h->slice_num = ++h0->current_slice;
3986 if(h->slice_num >= MAX_SLICES){
3987 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3991 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3995 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3996 +(h->ref_list[j][i].reference&3);
3999 for(i=16; i<48; i++)
4000 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4001 +(h->ref_list[j][i].reference&3);
4004 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4005 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4007 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4008 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4010 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4012 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4013 pps_id, h->frame_num,
4014 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4015 h->ref_count[0], h->ref_count[1],
4017 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4019 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4020 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4030 static inline int get_level_prefix(GetBitContext *gb){
4034 OPEN_READER(re, gb);
4035 UPDATE_CACHE(re, gb);
4036 buf=GET_CACHE(re, gb);
4038 log= 32 - av_log2(buf);
4040 print_bin(buf>>(32-log), log);
4041 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4044 LAST_SKIP_BITS(re, gb, log);
4045 CLOSE_READER(re, gb);
4050 static inline int get_dct8x8_allowed(H264Context *h){
4051 if(h->sps.direct_8x8_inference_flag)
4052 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4054 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4058 * decodes a residual block.
4059 * @param n block index
4060 * @param scantable scantable
4061 * @param max_coeff number of coefficients in the block
4062 * @return <0 if an error occurred
4064 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4065 MpegEncContext * const s = &h->s;
4066 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4068 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4070 //FIXME put trailing_onex into the context
4072 if(n == CHROMA_DC_BLOCK_INDEX){
4073 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4074 total_coeff= coeff_token>>2;
4076 if(n == LUMA_DC_BLOCK_INDEX){
4077 total_coeff= pred_non_zero_count(h, 0);
4078 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4079 total_coeff= coeff_token>>2;
4081 total_coeff= pred_non_zero_count(h, n);
4082 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4083 total_coeff= coeff_token>>2;
4084 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4088 //FIXME set last_non_zero?
4092 if(total_coeff > (unsigned)max_coeff) {
4093 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4097 trailing_ones= coeff_token&3;
4098 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4099 assert(total_coeff<=16);
4101 i = show_bits(gb, 3);
4102 skip_bits(gb, trailing_ones);
4103 level[0] = 1-((i&4)>>1);
4104 level[1] = 1-((i&2) );
4105 level[2] = 1-((i&1)<<1);
4107 if(trailing_ones<total_coeff) {
4108 int level_code, mask;
4109 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4110 int prefix= get_level_prefix(gb);
4112 //first coefficient has suffix_length equal to 0 or 1
4113 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4115 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4117 level_code= (prefix<<suffix_length); //part
4118 }else if(prefix==14){
4120 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4122 level_code= prefix + get_bits(gb, 4); //part
4124 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4125 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4127 level_code += (1<<(prefix-3))-4096;
4130 if(trailing_ones < 3) level_code += 2;
4135 mask= -(level_code&1);
4136 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4138 //remaining coefficients have suffix_length > 0
4139 for(i=trailing_ones+1;i<total_coeff;i++) {
4140 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4141 prefix = get_level_prefix(gb);
4143 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4145 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4147 level_code += (1<<(prefix-3))-4096;
4149 mask= -(level_code&1);
4150 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4151 if(level_code > suffix_limit[suffix_length])
4156 if(total_coeff == max_coeff)
4159 if(n == CHROMA_DC_BLOCK_INDEX)
4160 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4162 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4165 coeff_num = zeros_left + total_coeff - 1;
4166 j = scantable[coeff_num];
4168 block[j] = level[0];
4169 for(i=1;i<total_coeff;i++) {
4172 else if(zeros_left < 7){
4173 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4175 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4177 zeros_left -= run_before;
4178 coeff_num -= 1 + run_before;
4179 j= scantable[ coeff_num ];
4184 block[j] = (level[0] * qmul[j] + 32)>>6;
4185 for(i=1;i<total_coeff;i++) {
4188 else if(zeros_left < 7){
4189 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4191 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4193 zeros_left -= run_before;
4194 coeff_num -= 1 + run_before;
4195 j= scantable[ coeff_num ];
4197 block[j]= (level[i] * qmul[j] + 32)>>6;
4202 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4209 static void predict_field_decoding_flag(H264Context *h){
4210 MpegEncContext * const s = &h->s;
4211 const int mb_xy= h->mb_xy;
4212 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4213 ? s->current_picture.mb_type[mb_xy-1]
4214 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4215 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4217 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4221 * decodes a P_SKIP or B_SKIP macroblock
4223 static void decode_mb_skip(H264Context *h){
4224 MpegEncContext * const s = &h->s;
4225 const int mb_xy= h->mb_xy;
4228 memset(h->non_zero_count[mb_xy], 0, 16);
4229 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4232 mb_type|= MB_TYPE_INTERLACED;
4234 if( h->slice_type_nos == FF_B_TYPE )
4236 // just for fill_caches. pred_direct_motion will set the real mb_type
4237 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4239 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4240 pred_direct_motion(h, &mb_type);
4241 mb_type|= MB_TYPE_SKIP;
4246 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4248 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4249 pred_pskip_motion(h, &mx, &my);
4250 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4251 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4254 write_back_motion(h, mb_type);
4255 s->current_picture.mb_type[mb_xy]= mb_type;
4256 s->current_picture.qscale_table[mb_xy]= s->qscale;
4257 h->slice_table[ mb_xy ]= h->slice_num;
4258 h->prev_mb_skipped= 1;
4262 * decodes a macroblock
4263 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4265 static int decode_mb_cavlc(H264Context *h){
4266 MpegEncContext * const s = &h->s;
4268 int partition_count;
4269 unsigned int mb_type, cbp;
4270 int dct8x8_allowed= h->pps.transform_8x8_mode;
4272 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4274 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4275 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4277 if(h->slice_type_nos != FF_I_TYPE){
4278 if(s->mb_skip_run==-1)
4279 s->mb_skip_run= get_ue_golomb(&s->gb);
4281 if (s->mb_skip_run--) {
4282 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4283 if(s->mb_skip_run==0)
4284 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4286 predict_field_decoding_flag(h);
4293 if( (s->mb_y&1) == 0 )
4294 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4297 h->prev_mb_skipped= 0;
4299 mb_type= get_ue_golomb(&s->gb);
4300 if(h->slice_type_nos == FF_B_TYPE){
4302 partition_count= b_mb_type_info[mb_type].partition_count;
4303 mb_type= b_mb_type_info[mb_type].type;
4306 goto decode_intra_mb;
4308 }else if(h->slice_type_nos == FF_P_TYPE){
4310 partition_count= p_mb_type_info[mb_type].partition_count;
4311 mb_type= p_mb_type_info[mb_type].type;
4314 goto decode_intra_mb;
4317 assert(h->slice_type_nos == FF_I_TYPE);
4318 if(h->slice_type == FF_SI_TYPE && mb_type)
4322 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4326 cbp= i_mb_type_info[mb_type].cbp;
4327 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4328 mb_type= i_mb_type_info[mb_type].type;
4332 mb_type |= MB_TYPE_INTERLACED;
4334 h->slice_table[ mb_xy ]= h->slice_num;
4336 if(IS_INTRA_PCM(mb_type)){
4339 // We assume these blocks are very rare so we do not optimize it.
4340 align_get_bits(&s->gb);
4342 // The pixels are stored in the same order as levels in h->mb array.
4343 for(x=0; x < (CHROMA ? 384 : 256); x++){
4344 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4347 // In deblocking, the quantizer is 0
4348 s->current_picture.qscale_table[mb_xy]= 0;
4349 // All coeffs are present
4350 memset(h->non_zero_count[mb_xy], 16, 16);
4352 s->current_picture.mb_type[mb_xy]= mb_type;
4357 h->ref_count[0] <<= 1;
4358 h->ref_count[1] <<= 1;
4361 fill_caches(h, mb_type, 0);
4364 if(IS_INTRA(mb_type)){
4366 // init_top_left_availability(h);
4367 if(IS_INTRA4x4(mb_type)){
4370 if(dct8x8_allowed && get_bits1(&s->gb)){
4371 mb_type |= MB_TYPE_8x8DCT;
4375 // fill_intra4x4_pred_table(h);
4376 for(i=0; i<16; i+=di){
4377 int mode= pred_intra_mode(h, i);
4379 if(!get_bits1(&s->gb)){
4380 const int rem_mode= get_bits(&s->gb, 3);
4381 mode = rem_mode + (rem_mode >= mode);
4385 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4387 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4389 write_back_intra_pred_mode(h);
4390 if( check_intra4x4_pred_mode(h) < 0)
4393 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4394 if(h->intra16x16_pred_mode < 0)
4398 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4401 h->chroma_pred_mode= pred_mode;
4403 }else if(partition_count==4){
4404 int i, j, sub_partition_count[4], list, ref[2][4];
4406 if(h->slice_type_nos == FF_B_TYPE){
4408 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4409 if(h->sub_mb_type[i] >=13){
4410 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4413 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4414 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4416 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4417 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4418 pred_direct_motion(h, &mb_type);
4419 h->ref_cache[0][scan8[4]] =
4420 h->ref_cache[1][scan8[4]] =
4421 h->ref_cache[0][scan8[12]] =
4422 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4425 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4427 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4428 if(h->sub_mb_type[i] >=4){
4429 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4432 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4433 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4437 for(list=0; list<h->list_count; list++){
4438 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4440 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4441 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4442 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4444 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4456 dct8x8_allowed = get_dct8x8_allowed(h);
4458 for(list=0; list<h->list_count; list++){
4460 if(IS_DIRECT(h->sub_mb_type[i])) {
4461 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4464 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4465 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4467 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4468 const int sub_mb_type= h->sub_mb_type[i];
4469 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4470 for(j=0; j<sub_partition_count[i]; j++){
4472 const int index= 4*i + block_width*j;
4473 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4474 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4475 mx += get_se_golomb(&s->gb);
4476 my += get_se_golomb(&s->gb);
4477 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4479 if(IS_SUB_8X8(sub_mb_type)){
4481 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4483 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4484 }else if(IS_SUB_8X4(sub_mb_type)){
4485 mv_cache[ 1 ][0]= mx;
4486 mv_cache[ 1 ][1]= my;
4487 }else if(IS_SUB_4X8(sub_mb_type)){
4488 mv_cache[ 8 ][0]= mx;
4489 mv_cache[ 8 ][1]= my;
4491 mv_cache[ 0 ][0]= mx;
4492 mv_cache[ 0 ][1]= my;
4495 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4501 }else if(IS_DIRECT(mb_type)){
4502 pred_direct_motion(h, &mb_type);
4503 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4505 int list, mx, my, i;
4506 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4507 if(IS_16X16(mb_type)){
4508 for(list=0; list<h->list_count; list++){
4510 if(IS_DIR(mb_type, 0, list)){
4511 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4512 if(val >= h->ref_count[list]){
4513 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4517 val= LIST_NOT_USED&0xFF;
4518 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4520 for(list=0; list<h->list_count; list++){
4522 if(IS_DIR(mb_type, 0, list)){
4523 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4524 mx += get_se_golomb(&s->gb);
4525 my += get_se_golomb(&s->gb);
4526 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4528 val= pack16to32(mx,my);
4531 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4534 else if(IS_16X8(mb_type)){
4535 for(list=0; list<h->list_count; list++){
4538 if(IS_DIR(mb_type, i, list)){
4539 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4540 if(val >= h->ref_count[list]){
4541 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4545 val= LIST_NOT_USED&0xFF;
4546 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4549 for(list=0; list<h->list_count; list++){
4552 if(IS_DIR(mb_type, i, list)){
4553 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4554 mx += get_se_golomb(&s->gb);
4555 my += get_se_golomb(&s->gb);
4556 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4558 val= pack16to32(mx,my);
4561 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4565 assert(IS_8X16(mb_type));
4566 for(list=0; list<h->list_count; list++){
4569 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4570 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4571 if(val >= h->ref_count[list]){
4572 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4576 val= LIST_NOT_USED&0xFF;
4577 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4580 for(list=0; list<h->list_count; list++){
4583 if(IS_DIR(mb_type, i, list)){
4584 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4585 mx += get_se_golomb(&s->gb);
4586 my += get_se_golomb(&s->gb);
4587 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4589 val= pack16to32(mx,my);
4592 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4598 if(IS_INTER(mb_type))
4599 write_back_motion(h, mb_type);
4601 if(!IS_INTRA16x16(mb_type)){
4602 cbp= get_ue_golomb(&s->gb);
4604 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4609 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4610 else cbp= golomb_to_inter_cbp [cbp];
4612 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4613 else cbp= golomb_to_inter_cbp_gray[cbp];
4618 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4619 if(get_bits1(&s->gb)){
4620 mb_type |= MB_TYPE_8x8DCT;
4621 h->cbp_table[mb_xy]= cbp;
4624 s->current_picture.mb_type[mb_xy]= mb_type;
4626 if(cbp || IS_INTRA16x16(mb_type)){
4627 int i8x8, i4x4, chroma_idx;
4629 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4630 const uint8_t *scan, *scan8x8, *dc_scan;
4632 // fill_non_zero_count_cache(h);
4634 if(IS_INTERLACED(mb_type)){
4635 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4636 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4637 dc_scan= luma_dc_field_scan;
4639 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4640 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4641 dc_scan= luma_dc_zigzag_scan;
4644 dquant= get_se_golomb(&s->gb);
4646 if( dquant > 25 || dquant < -26 ){
4647 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4651 s->qscale += dquant;
4652 if(((unsigned)s->qscale) > 51){
4653 if(s->qscale<0) s->qscale+= 52;
4654 else s->qscale-= 52;
4657 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4658 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4659 if(IS_INTRA16x16(mb_type)){
4660 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4661 return -1; //FIXME continue if partitioned and other return -1 too
4664 assert((cbp&15) == 0 || (cbp&15) == 15);
4667 for(i8x8=0; i8x8<4; i8x8++){
4668 for(i4x4=0; i4x4<4; i4x4++){
4669 const int index= i4x4 + 4*i8x8;
4670 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4676 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4679 for(i8x8=0; i8x8<4; i8x8++){
4680 if(cbp & (1<<i8x8)){
4681 if(IS_8x8DCT(mb_type)){
4682 DCTELEM *buf = &h->mb[64*i8x8];
4684 for(i4x4=0; i4x4<4; i4x4++){
4685 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4686 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4689 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4690 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4692 for(i4x4=0; i4x4<4; i4x4++){
4693 const int index= i4x4 + 4*i8x8;
4695 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4701 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4702 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4708 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4709 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4715 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4716 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4717 for(i4x4=0; i4x4<4; i4x4++){
4718 const int index= 16 + 4*chroma_idx + i4x4;
4719 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4725 uint8_t * const nnz= &h->non_zero_count_cache[0];
4726 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4727 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4730 uint8_t * const nnz= &h->non_zero_count_cache[0];
4731 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4732 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4733 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4735 s->current_picture.qscale_table[mb_xy]= s->qscale;
4736 write_back_non_zero_count(h);
4739 h->ref_count[0] >>= 1;
4740 h->ref_count[1] >>= 1;
4746 static int decode_cabac_field_decoding_flag(H264Context *h) {
4747 MpegEncContext * const s = &h->s;
4748 const int mb_x = s->mb_x;
4749 const int mb_y = s->mb_y & ~1;
4750 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4751 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4753 unsigned int ctx = 0;
4755 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4758 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4762 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4765 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4766 uint8_t *state= &h->cabac_state[ctx_base];
4770 MpegEncContext * const s = &h->s;
4771 const int mba_xy = h->left_mb_xy[0];
4772 const int mbb_xy = h->top_mb_xy;
4774 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4776 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4778 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4779 return 0; /* I4x4 */
4782 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4783 return 0; /* I4x4 */
4786 if( get_cabac_terminate( &h->cabac ) )
4787 return 25; /* PCM */
4789 mb_type = 1; /* I16x16 */
4790 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4791 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4792 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4793 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4794 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4798 static int decode_cabac_mb_type_b( H264Context *h ) {
4799 MpegEncContext * const s = &h->s;
4801 const int mba_xy = h->left_mb_xy[0];
4802 const int mbb_xy = h->top_mb_xy;
4805 assert(h->slice_type_nos == FF_B_TYPE);
4807 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4809 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4812 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4813 return 0; /* B_Direct_16x16 */
4815 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4816 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4819 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4820 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4821 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4822 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4824 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4825 else if( bits == 13 ) {
4826 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4827 } else if( bits == 14 )
4828 return 11; /* B_L1_L0_8x16 */
4829 else if( bits == 15 )
4830 return 22; /* B_8x8 */
4832 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4833 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4836 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4837 MpegEncContext * const s = &h->s;
4841 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4842 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4845 && h->slice_table[mba_xy] == h->slice_num
4846 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4847 mba_xy += s->mb_stride;
4849 mbb_xy = mb_xy - s->mb_stride;
4851 && h->slice_table[mbb_xy] == h->slice_num
4852 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4853 mbb_xy -= s->mb_stride;
4855 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4857 int mb_xy = h->mb_xy;
4859 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4862 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4864 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4867 if( h->slice_type_nos == FF_B_TYPE )
4869 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4872 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4875 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4878 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4879 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4880 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4882 if( mode >= pred_mode )
4888 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4889 const int mba_xy = h->left_mb_xy[0];
4890 const int mbb_xy = h->top_mb_xy;
4894 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4895 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4898 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4901 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4904 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4906 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4912 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4913 int cbp_b, cbp_a, ctx, cbp = 0;
4915 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4916 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4918 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4919 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4920 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4921 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4922 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4923 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4924 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4925 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4928 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4932 cbp_a = (h->left_cbp>>4)&0x03;
4933 cbp_b = (h-> top_cbp>>4)&0x03;
4936 if( cbp_a > 0 ) ctx++;
4937 if( cbp_b > 0 ) ctx += 2;
4938 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4942 if( cbp_a == 2 ) ctx++;
4943 if( cbp_b == 2 ) ctx += 2;
4944 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4946 static int decode_cabac_mb_dqp( H264Context *h) {
4947 int ctx= h->last_qscale_diff != 0;
4950 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4953 if(val > 102) //prevent infinite loop
4958 return (val + 1)>>1 ;
4960 return -((val + 1)>>1);
4962 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4963 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4965 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4967 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4971 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4973 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4974 return 0; /* B_Direct_8x8 */
4975 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4976 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4978 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4979 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4980 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4983 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4984 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4988 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4989 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4992 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4993 int refa = h->ref_cache[list][scan8[n] - 1];
4994 int refb = h->ref_cache[list][scan8[n] - 8];
4998 if( h->slice_type_nos == FF_B_TYPE) {
4999 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5001 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5010 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5013 if(ref >= 32 /*h->ref_list[list]*/){
5020 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5021 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5022 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5023 int ctxbase = (l == 0) ? 40 : 47;
5025 int ctx = (amvd>2) + (amvd>32);
5027 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5032 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5040 while( get_cabac_bypass( &h->cabac ) ) {
5044 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5049 if( get_cabac_bypass( &h->cabac ) )
5053 return get_cabac_bypass_sign( &h->cabac, -mvd );
5056 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5062 nza = h->left_cbp&0x100;
5063 nzb = h-> top_cbp&0x100;
5065 nza = (h->left_cbp>>(6+idx))&0x01;
5066 nzb = (h-> top_cbp>>(6+idx))&0x01;
5069 assert(cat == 1 || cat == 2 || cat == 4);
5070 nza = h->non_zero_count_cache[scan8[idx] - 1];
5071 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5080 return ctx + 4 * cat;
5083 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5084 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5085 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5086 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5087 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5090 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5091 static const int significant_coeff_flag_offset[2][6] = {
5092 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5093 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5095 static const int last_coeff_flag_offset[2][6] = {
5096 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5097 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5099 static const int coeff_abs_level_m1_offset[6] = {
5100 227+0, 227+10, 227+20, 227+30, 227+39, 426
5102 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5103 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5104 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5105 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5106 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5107 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5108 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5109 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5110 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5112 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5113 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5114 * map node ctx => cabac ctx for level=1 */
5115 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5116 /* map node ctx => cabac ctx for level>1 */
5117 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5118 static const uint8_t coeff_abs_level_transition[2][8] = {
5119 /* update node ctx after decoding a level=1 */
5120 { 1, 2, 3, 3, 4, 5, 6, 7 },
5121 /* update node ctx after decoding a level>1 */
5122 { 4, 4, 4, 4, 5, 6, 7, 7 }
5128 int coeff_count = 0;
5131 uint8_t *significant_coeff_ctx_base;
5132 uint8_t *last_coeff_ctx_base;
5133 uint8_t *abs_level_m1_ctx_base;
5136 #define CABAC_ON_STACK
5138 #ifdef CABAC_ON_STACK
5141 cc.range = h->cabac.range;
5142 cc.low = h->cabac.low;
5143 cc.bytestream= h->cabac.bytestream;
5145 #define CC &h->cabac
5149 /* cat: 0-> DC 16x16 n = 0
5150 * 1-> AC 16x16 n = luma4x4idx
5151 * 2-> Luma4x4 n = luma4x4idx
5152 * 3-> DC Chroma n = iCbCr
5153 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5154 * 5-> Luma8x8 n = 4 * luma8x8idx
5157 /* read coded block flag */
5158 if( is_dc || cat != 5 ) {
5159 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5161 h->non_zero_count_cache[scan8[n]] = 0;
5163 #ifdef CABAC_ON_STACK
5164 h->cabac.range = cc.range ;
5165 h->cabac.low = cc.low ;
5166 h->cabac.bytestream= cc.bytestream;
5172 significant_coeff_ctx_base = h->cabac_state
5173 + significant_coeff_flag_offset[MB_FIELD][cat];
5174 last_coeff_ctx_base = h->cabac_state
5175 + last_coeff_flag_offset[MB_FIELD][cat];
5176 abs_level_m1_ctx_base = h->cabac_state
5177 + coeff_abs_level_m1_offset[cat];
5179 if( !is_dc && cat == 5 ) {
5180 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5181 for(last= 0; last < coefs; last++) { \
5182 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5183 if( get_cabac( CC, sig_ctx )) { \
5184 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5185 index[coeff_count++] = last; \
5186 if( get_cabac( CC, last_ctx ) ) { \
5192 if( last == max_coeff -1 ) {\
5193 index[coeff_count++] = last;\
5195 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5196 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5197 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5199 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5201 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5203 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5206 assert(coeff_count > 0);
5210 h->cbp_table[h->mb_xy] |= 0x100;
5212 h->cbp_table[h->mb_xy] |= 0x40 << n;
5215 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5217 assert( cat == 1 || cat == 2 || cat == 4 );
5218 h->non_zero_count_cache[scan8[n]] = coeff_count;
5223 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5225 int j= scantable[index[--coeff_count]];
5227 if( get_cabac( CC, ctx ) == 0 ) {
5228 node_ctx = coeff_abs_level_transition[0][node_ctx];
5230 block[j] = get_cabac_bypass_sign( CC, -1);
5232 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5236 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5237 node_ctx = coeff_abs_level_transition[1][node_ctx];
5239 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5243 if( coeff_abs >= 15 ) {
5245 while( get_cabac_bypass( CC ) ) {
5251 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5257 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5259 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5262 } while( coeff_count );
5263 #ifdef CABAC_ON_STACK
5264 h->cabac.range = cc.range ;
5265 h->cabac.low = cc.low ;
5266 h->cabac.bytestream= cc.bytestream;
5271 #ifndef CONFIG_SMALL
5272 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5273 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5276 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5277 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5281 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5283 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5285 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5286 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5290 static inline void compute_mb_neighbors(H264Context *h)
5292 MpegEncContext * const s = &h->s;
5293 const int mb_xy = h->mb_xy;
5294 h->top_mb_xy = mb_xy - s->mb_stride;
5295 h->left_mb_xy[0] = mb_xy - 1;
5297 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5298 const int top_pair_xy = pair_xy - s->mb_stride;
5299 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5300 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5301 const int curr_mb_field_flag = MB_FIELD;
5302 const int bottom = (s->mb_y & 1);
5304 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5305 h->top_mb_xy -= s->mb_stride;
5307 if (!left_mb_field_flag == curr_mb_field_flag) {
5308 h->left_mb_xy[0] = pair_xy - 1;
5310 } else if (FIELD_PICTURE) {
5311 h->top_mb_xy -= s->mb_stride;
5317 * decodes a macroblock
5318 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5320 static int decode_mb_cabac(H264Context *h) {
5321 MpegEncContext * const s = &h->s;
5323 int mb_type, partition_count, cbp = 0;
5324 int dct8x8_allowed= h->pps.transform_8x8_mode;
5326 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5328 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5329 if( h->slice_type_nos != FF_I_TYPE ) {
5331 /* a skipped mb needs the aff flag from the following mb */
5332 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5333 predict_field_decoding_flag(h);
5334 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5335 skip = h->next_mb_skipped;
5337 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5338 /* read skip flags */
5340 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5341 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5342 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5343 if(!h->next_mb_skipped)
5344 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5349 h->cbp_table[mb_xy] = 0;
5350 h->chroma_pred_mode_table[mb_xy] = 0;
5351 h->last_qscale_diff = 0;
5358 if( (s->mb_y&1) == 0 )
5360 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5363 h->prev_mb_skipped = 0;
5365 compute_mb_neighbors(h);
5367 if( h->slice_type_nos == FF_B_TYPE ) {
5368 mb_type = decode_cabac_mb_type_b( h );
5370 partition_count= b_mb_type_info[mb_type].partition_count;
5371 mb_type= b_mb_type_info[mb_type].type;
5374 goto decode_intra_mb;
5376 } else if( h->slice_type_nos == FF_P_TYPE ) {
5377 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5379 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5380 /* P_L0_D16x16, P_8x8 */
5381 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5383 /* P_L0_D8x16, P_L0_D16x8 */
5384 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5386 partition_count= p_mb_type_info[mb_type].partition_count;
5387 mb_type= p_mb_type_info[mb_type].type;
5389 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5390 goto decode_intra_mb;
5393 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5394 if(h->slice_type == FF_SI_TYPE && mb_type)
5396 assert(h->slice_type_nos == FF_I_TYPE);
5398 partition_count = 0;
5399 cbp= i_mb_type_info[mb_type].cbp;
5400 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5401 mb_type= i_mb_type_info[mb_type].type;
5404 mb_type |= MB_TYPE_INTERLACED;
5406 h->slice_table[ mb_xy ]= h->slice_num;
5408 if(IS_INTRA_PCM(mb_type)) {
5411 // We assume these blocks are very rare so we do not optimize it.
5412 // FIXME The two following lines get the bitstream position in the cabac
5413 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5414 ptr= h->cabac.bytestream;
5415 if(h->cabac.low&0x1) ptr--;
5417 if(h->cabac.low&0x1FF) ptr--;
5420 // The pixels are stored in the same order as levels in h->mb array.
5421 memcpy(h->mb, ptr, 256); ptr+=256;
5423 memcpy(h->mb+128, ptr, 128); ptr+=128;
5426 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5428 // All blocks are present
5429 h->cbp_table[mb_xy] = 0x1ef;
5430 h->chroma_pred_mode_table[mb_xy] = 0;
5431 // In deblocking, the quantizer is 0
5432 s->current_picture.qscale_table[mb_xy]= 0;
5433 // All coeffs are present
5434 memset(h->non_zero_count[mb_xy], 16, 16);
5435 s->current_picture.mb_type[mb_xy]= mb_type;
5436 h->last_qscale_diff = 0;
5441 h->ref_count[0] <<= 1;
5442 h->ref_count[1] <<= 1;
5445 fill_caches(h, mb_type, 0);
5447 if( IS_INTRA( mb_type ) ) {
5449 if( IS_INTRA4x4( mb_type ) ) {
5450 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5451 mb_type |= MB_TYPE_8x8DCT;
5452 for( i = 0; i < 16; i+=4 ) {
5453 int pred = pred_intra_mode( h, i );
5454 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5455 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5458 for( i = 0; i < 16; i++ ) {
5459 int pred = pred_intra_mode( h, i );
5460 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5462 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5465 write_back_intra_pred_mode(h);
5466 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5468 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5469 if( h->intra16x16_pred_mode < 0 ) return -1;
5472 h->chroma_pred_mode_table[mb_xy] =
5473 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5475 pred_mode= check_intra_pred_mode( h, pred_mode );
5476 if( pred_mode < 0 ) return -1;
5477 h->chroma_pred_mode= pred_mode;
5479 } else if( partition_count == 4 ) {
5480 int i, j, sub_partition_count[4], list, ref[2][4];
5482 if( h->slice_type_nos == FF_B_TYPE ) {
5483 for( i = 0; i < 4; i++ ) {
5484 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5485 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5486 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5488 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5489 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5490 pred_direct_motion(h, &mb_type);
5491 h->ref_cache[0][scan8[4]] =
5492 h->ref_cache[1][scan8[4]] =
5493 h->ref_cache[0][scan8[12]] =
5494 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5495 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5496 for( i = 0; i < 4; i++ )
5497 if( IS_DIRECT(h->sub_mb_type[i]) )
5498 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5502 for( i = 0; i < 4; i++ ) {
5503 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5504 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5505 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5509 for( list = 0; list < h->list_count; list++ ) {
5510 for( i = 0; i < 4; i++ ) {
5511 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5512 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5513 if( h->ref_count[list] > 1 ){
5514 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5515 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5516 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5524 h->ref_cache[list][ scan8[4*i]+1 ]=
5525 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5530 dct8x8_allowed = get_dct8x8_allowed(h);
5532 for(list=0; list<h->list_count; list++){
5534 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5535 if(IS_DIRECT(h->sub_mb_type[i])){
5536 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5540 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5541 const int sub_mb_type= h->sub_mb_type[i];
5542 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5543 for(j=0; j<sub_partition_count[i]; j++){
5546 const int index= 4*i + block_width*j;
5547 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5548 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5549 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5551 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5552 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5553 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5555 if(IS_SUB_8X8(sub_mb_type)){
5557 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5559 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5562 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5564 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5565 }else if(IS_SUB_8X4(sub_mb_type)){
5566 mv_cache[ 1 ][0]= mx;
5567 mv_cache[ 1 ][1]= my;
5569 mvd_cache[ 1 ][0]= mx - mpx;
5570 mvd_cache[ 1 ][1]= my - mpy;
5571 }else if(IS_SUB_4X8(sub_mb_type)){
5572 mv_cache[ 8 ][0]= mx;
5573 mv_cache[ 8 ][1]= my;
5575 mvd_cache[ 8 ][0]= mx - mpx;
5576 mvd_cache[ 8 ][1]= my - mpy;
5578 mv_cache[ 0 ][0]= mx;
5579 mv_cache[ 0 ][1]= my;
5581 mvd_cache[ 0 ][0]= mx - mpx;
5582 mvd_cache[ 0 ][1]= my - mpy;
5585 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5586 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5587 p[0] = p[1] = p[8] = p[9] = 0;
5588 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5592 } else if( IS_DIRECT(mb_type) ) {
5593 pred_direct_motion(h, &mb_type);
5594 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5595 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5596 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5598 int list, mx, my, i, mpx, mpy;
5599 if(IS_16X16(mb_type)){
5600 for(list=0; list<h->list_count; list++){
5601 if(IS_DIR(mb_type, 0, list)){
5603 if(h->ref_count[list] > 1){
5604 ref= decode_cabac_mb_ref(h, list, 0);
5605 if(ref >= (unsigned)h->ref_count[list]){
5606 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5611 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5613 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5615 for(list=0; list<h->list_count; list++){
5616 if(IS_DIR(mb_type, 0, list)){
5617 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5619 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5620 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5621 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5623 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5624 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5626 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5629 else if(IS_16X8(mb_type)){
5630 for(list=0; list<h->list_count; list++){
5632 if(IS_DIR(mb_type, i, list)){
5634 if(h->ref_count[list] > 1){
5635 ref= decode_cabac_mb_ref( h, list, 8*i );
5636 if(ref >= (unsigned)h->ref_count[list]){
5637 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5642 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5644 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5647 for(list=0; list<h->list_count; list++){
5649 if(IS_DIR(mb_type, i, list)){
5650 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5651 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5652 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5653 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5655 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5656 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5658 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5659 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5664 assert(IS_8X16(mb_type));
5665 for(list=0; list<h->list_count; list++){
5667 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5669 if(h->ref_count[list] > 1){
5670 ref= decode_cabac_mb_ref( h, list, 4*i );
5671 if(ref >= (unsigned)h->ref_count[list]){
5672 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5677 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5679 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5682 for(list=0; list<h->list_count; list++){
5684 if(IS_DIR(mb_type, i, list)){
5685 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5686 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5687 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5689 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5690 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5691 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5693 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5694 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5701 if( IS_INTER( mb_type ) ) {
5702 h->chroma_pred_mode_table[mb_xy] = 0;
5703 write_back_motion( h, mb_type );
5706 if( !IS_INTRA16x16( mb_type ) ) {
5707 cbp = decode_cabac_mb_cbp_luma( h );
5709 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5712 h->cbp_table[mb_xy] = h->cbp = cbp;
5714 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5715 if( decode_cabac_mb_transform_size( h ) )
5716 mb_type |= MB_TYPE_8x8DCT;
5718 s->current_picture.mb_type[mb_xy]= mb_type;
5720 if( cbp || IS_INTRA16x16( mb_type ) ) {
5721 const uint8_t *scan, *scan8x8, *dc_scan;
5722 const uint32_t *qmul;
5725 if(IS_INTERLACED(mb_type)){
5726 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5727 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5728 dc_scan= luma_dc_field_scan;
5730 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5731 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5732 dc_scan= luma_dc_zigzag_scan;
5735 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5736 if( dqp == INT_MIN ){
5737 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5741 if(((unsigned)s->qscale) > 51){
5742 if(s->qscale<0) s->qscale+= 52;
5743 else s->qscale-= 52;
5745 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5746 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5748 if( IS_INTRA16x16( mb_type ) ) {
5750 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5751 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5754 qmul = h->dequant4_coeff[0][s->qscale];
5755 for( i = 0; i < 16; i++ ) {
5756 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5757 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5760 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5764 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5765 if( cbp & (1<<i8x8) ) {
5766 if( IS_8x8DCT(mb_type) ) {
5767 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5768 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5770 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5771 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5772 const int index = 4*i8x8 + i4x4;
5773 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5775 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5776 //STOP_TIMER("decode_residual")
5780 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5781 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5788 for( c = 0; c < 2; c++ ) {
5789 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5790 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5796 for( c = 0; c < 2; c++ ) {
5797 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5798 for( i = 0; i < 4; i++ ) {
5799 const int index = 16 + 4 * c + i;
5800 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5801 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5805 uint8_t * const nnz= &h->non_zero_count_cache[0];
5806 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5807 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5810 uint8_t * const nnz= &h->non_zero_count_cache[0];
5811 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5812 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5813 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5814 h->last_qscale_diff = 0;
5817 s->current_picture.qscale_table[mb_xy]= s->qscale;
5818 write_back_non_zero_count(h);
5821 h->ref_count[0] >>= 1;
5822 h->ref_count[1] >>= 1;
5829 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5830 const int index_a = qp + h->slice_alpha_c0_offset;
5831 const int alpha = (alpha_table+52)[index_a];
5832 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5836 tc[0] = (tc0_table+52)[index_a][bS[0]];
5837 tc[1] = (tc0_table+52)[index_a][bS[1]];
5838 tc[2] = (tc0_table+52)[index_a][bS[2]];
5839 tc[3] = (tc0_table+52)[index_a][bS[3]];
5840 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5842 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5845 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5846 const int index_a = qp + h->slice_alpha_c0_offset;
5847 const int alpha = (alpha_table+52)[index_a];
5848 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5852 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5853 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5854 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5855 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5856 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5858 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5862 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5864 for( i = 0; i < 16; i++, pix += stride) {
5870 int bS_index = (i >> 1);
5873 bS_index |= (i & 1);
5876 if( bS[bS_index] == 0 ) {
5880 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5881 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5882 alpha = (alpha_table+52)[index_a];
5883 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5885 if( bS[bS_index] < 4 ) {
5886 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5887 const int p0 = pix[-1];
5888 const int p1 = pix[-2];
5889 const int p2 = pix[-3];
5890 const int q0 = pix[0];
5891 const int q1 = pix[1];
5892 const int q2 = pix[2];
5894 if( FFABS( p0 - q0 ) < alpha &&
5895 FFABS( p1 - p0 ) < beta &&
5896 FFABS( q1 - q0 ) < beta ) {
5900 if( FFABS( p2 - p0 ) < beta ) {
5901 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5904 if( FFABS( q2 - q0 ) < beta ) {
5905 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5909 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5910 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5911 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5912 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5915 const int p0 = pix[-1];
5916 const int p1 = pix[-2];
5917 const int p2 = pix[-3];
5919 const int q0 = pix[0];
5920 const int q1 = pix[1];
5921 const int q2 = pix[2];
5923 if( FFABS( p0 - q0 ) < alpha &&
5924 FFABS( p1 - p0 ) < beta &&
5925 FFABS( q1 - q0 ) < beta ) {
5927 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5928 if( FFABS( p2 - p0 ) < beta)
5930 const int p3 = pix[-4];
5932 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5933 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5934 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5937 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5939 if( FFABS( q2 - q0 ) < beta)
5941 const int q3 = pix[3];
5943 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5944 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5945 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5948 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5952 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5953 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5955 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5960 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5962 for( i = 0; i < 8; i++, pix += stride) {
5970 if( bS[bS_index] == 0 ) {
5974 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5975 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5976 alpha = (alpha_table+52)[index_a];
5977 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5979 if( bS[bS_index] < 4 ) {
5980 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
5981 const int p0 = pix[-1];
5982 const int p1 = pix[-2];
5983 const int q0 = pix[0];
5984 const int q1 = pix[1];
5986 if( FFABS( p0 - q0 ) < alpha &&
5987 FFABS( p1 - p0 ) < beta &&
5988 FFABS( q1 - q0 ) < beta ) {
5989 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5991 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5992 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5993 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5996 const int p0 = pix[-1];
5997 const int p1 = pix[-2];
5998 const int q0 = pix[0];
5999 const int q1 = pix[1];
6001 if( FFABS( p0 - q0 ) < alpha &&
6002 FFABS( p1 - p0 ) < beta &&
6003 FFABS( q1 - q0 ) < beta ) {
6005 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6006 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6007 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6013 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6014 const int index_a = qp + h->slice_alpha_c0_offset;
6015 const int alpha = (alpha_table+52)[index_a];
6016 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6020 tc[0] = (tc0_table+52)[index_a][bS[0]];
6021 tc[1] = (tc0_table+52)[index_a][bS[1]];
6022 tc[2] = (tc0_table+52)[index_a][bS[2]];
6023 tc[3] = (tc0_table+52)[index_a][bS[3]];
6024 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6026 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6030 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6031 const int index_a = qp + h->slice_alpha_c0_offset;
6032 const int alpha = (alpha_table+52)[index_a];
6033 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6037 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6038 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6039 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6040 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6041 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6043 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6047 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6048 MpegEncContext * const s = &h->s;
6049 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6051 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6055 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6056 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6057 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6058 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6059 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6062 assert(!FRAME_MBAFF);
6064 mb_type = s->current_picture.mb_type[mb_xy];
6065 qp = s->current_picture.qscale_table[mb_xy];
6066 qp0 = s->current_picture.qscale_table[mb_xy-1];
6067 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6068 qpc = get_chroma_qp( h, 0, qp );
6069 qpc0 = get_chroma_qp( h, 0, qp0 );
6070 qpc1 = get_chroma_qp( h, 0, qp1 );
6071 qp0 = (qp + qp0 + 1) >> 1;
6072 qp1 = (qp + qp1 + 1) >> 1;
6073 qpc0 = (qpc + qpc0 + 1) >> 1;
6074 qpc1 = (qpc + qpc1 + 1) >> 1;
6075 qp_thresh = 15 - h->slice_alpha_c0_offset;
6076 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6077 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6080 if( IS_INTRA(mb_type) ) {
6081 int16_t bS4[4] = {4,4,4,4};
6082 int16_t bS3[4] = {3,3,3,3};
6083 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6084 if( IS_8x8DCT(mb_type) ) {
6085 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6086 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6087 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6088 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6090 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6091 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6092 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6093 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6094 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6095 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6096 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6097 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6099 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6100 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6101 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6102 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6103 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6104 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6105 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6106 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6109 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6110 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6112 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6114 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6116 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6117 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6118 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6119 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6121 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6122 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6123 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6124 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6126 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6127 bSv[0][0] = 0x0004000400040004ULL;
6128 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6129 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6131 #define FILTER(hv,dir,edge)\
6132 if(bSv[dir][edge]) {\
6133 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6135 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6136 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6142 } else if( IS_8x8DCT(mb_type) ) {
6162 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6163 MpegEncContext * const s = &h->s;
6165 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6166 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6167 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6168 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6169 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6171 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6172 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6173 // how often to recheck mv-based bS when iterating between edges
6174 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6175 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6176 // how often to recheck mv-based bS when iterating along each edge
6177 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6179 if (first_vertical_edge_done) {
6183 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6186 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6187 && !IS_INTERLACED(mb_type)
6188 && IS_INTERLACED(mbm_type)
6190 // This is a special case in the norm where the filtering must
6191 // be done twice (one each of the field) even if we are in a
6192 // frame macroblock.
6194 static const int nnz_idx[4] = {4,5,6,3};
6195 unsigned int tmp_linesize = 2 * linesize;
6196 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6197 int mbn_xy = mb_xy - 2 * s->mb_stride;
6202 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6203 if( IS_INTRA(mb_type) ||
6204 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6205 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6207 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6208 for( i = 0; i < 4; i++ ) {
6209 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6210 mbn_nnz[nnz_idx[i]] != 0 )
6216 // Do not use s->qscale as luma quantizer because it has not the same
6217 // value in IPCM macroblocks.
6218 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6219 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6220 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6221 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6222 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6223 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6224 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6225 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6232 for( edge = start; edge < edges; edge++ ) {
6233 /* mbn_xy: neighbor macroblock */
6234 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6235 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6236 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6240 if( (edge&1) && IS_8x8DCT(mb_type) )
6243 if( IS_INTRA(mb_type) ||
6244 IS_INTRA(mbn_type) ) {
6247 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6248 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6257 bS[0] = bS[1] = bS[2] = bS[3] = value;
6262 if( edge & mask_edge ) {
6263 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6266 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6267 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6270 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6271 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6272 int bn_idx= b_idx - (dir ? 8:1);
6275 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6276 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6277 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6278 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6281 if(h->slice_type_nos == FF_B_TYPE && v){
6283 for( l = 0; !v && l < 2; l++ ) {
6285 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6286 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6287 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6291 bS[0] = bS[1] = bS[2] = bS[3] = v;
6297 for( i = 0; i < 4; i++ ) {
6298 int x = dir == 0 ? edge : i;
6299 int y = dir == 0 ? i : edge;
6300 int b_idx= 8 + 4 + x + 8*y;
6301 int bn_idx= b_idx - (dir ? 8:1);
6303 if( h->non_zero_count_cache[b_idx] |
6304 h->non_zero_count_cache[bn_idx] ) {
6310 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6311 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6312 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6313 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6319 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6321 for( l = 0; l < 2; l++ ) {
6323 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6324 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6325 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6334 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6339 // Do not use s->qscale as luma quantizer because it has not the same
6340 // value in IPCM macroblocks.
6341 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6342 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6343 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6344 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6346 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6347 if( (edge&1) == 0 ) {
6348 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6349 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6350 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6351 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6354 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6355 if( (edge&1) == 0 ) {
6356 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6357 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6358 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6359 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6365 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6366 MpegEncContext * const s = &h->s;
6367 const int mb_xy= mb_x + mb_y*s->mb_stride;
6368 const int mb_type = s->current_picture.mb_type[mb_xy];
6369 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6370 int first_vertical_edge_done = 0;
6373 //for sufficiently low qp, filtering wouldn't do anything
6374 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6376 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6377 int qp = s->current_picture.qscale_table[mb_xy];
6379 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6380 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6385 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6386 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6387 int top_type, left_type[2];
6388 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6389 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6390 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6392 if(IS_8x8DCT(top_type)){
6393 h->non_zero_count_cache[4+8*0]=
6394 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6395 h->non_zero_count_cache[6+8*0]=
6396 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6398 if(IS_8x8DCT(left_type[0])){
6399 h->non_zero_count_cache[3+8*1]=
6400 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6402 if(IS_8x8DCT(left_type[1])){
6403 h->non_zero_count_cache[3+8*3]=
6404 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6407 if(IS_8x8DCT(mb_type)){
6408 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6409 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6411 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6412 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6414 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6415 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6417 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6418 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6423 // left mb is in picture
6424 && h->slice_table[mb_xy-1] != 0xFFFF
6425 // and current and left pair do not have the same interlaced type
6426 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6427 // and left mb is in the same slice if deblocking_filter == 2
6428 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6429 /* First vertical edge is different in MBAFF frames
6430 * There are 8 different bS to compute and 2 different Qp
6432 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6433 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6438 int mb_qp, mbn0_qp, mbn1_qp;
6440 first_vertical_edge_done = 1;
6442 if( IS_INTRA(mb_type) )
6443 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6445 for( i = 0; i < 8; i++ ) {
6446 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6448 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6450 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6451 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6452 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6454 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6461 mb_qp = s->current_picture.qscale_table[mb_xy];
6462 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6463 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6464 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6465 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6466 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6467 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6468 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6469 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6470 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6471 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6472 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6473 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6476 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6477 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6478 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6479 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6480 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6484 for( dir = 0; dir < 2; dir++ )
6485 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6487 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6488 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6492 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6493 H264Context *h = *(void**)arg;
6494 MpegEncContext * const s = &h->s;
6495 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6499 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6500 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6502 if( h->pps.cabac ) {
6506 align_get_bits( &s->gb );
6509 ff_init_cabac_states( &h->cabac);
6510 ff_init_cabac_decoder( &h->cabac,
6511 s->gb.buffer + get_bits_count(&s->gb)/8,
6512 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6513 /* calculate pre-state */
6514 for( i= 0; i < 460; i++ ) {
6516 if( h->slice_type_nos == FF_I_TYPE )
6517 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6519 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6522 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6524 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6529 int ret = decode_mb_cabac(h);
6531 //STOP_TIMER("decode_mb_cabac")
6533 if(ret>=0) hl_decode_mb(h);
6535 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6538 ret = decode_mb_cabac(h);
6540 if(ret>=0) hl_decode_mb(h);
6543 eos = get_cabac_terminate( &h->cabac );
6545 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6546 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6547 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6551 if( ++s->mb_x >= s->mb_width ) {
6553 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6555 if(FIELD_OR_MBAFF_PICTURE) {
6560 if( eos || s->mb_y >= s->mb_height ) {
6561 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6562 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6569 int ret = decode_mb_cavlc(h);
6571 if(ret>=0) hl_decode_mb(h);
6573 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6575 ret = decode_mb_cavlc(h);
6577 if(ret>=0) hl_decode_mb(h);
6582 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6583 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6588 if(++s->mb_x >= s->mb_width){
6590 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6592 if(FIELD_OR_MBAFF_PICTURE) {
6595 if(s->mb_y >= s->mb_height){
6596 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6598 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6599 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6603 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6610 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6611 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6612 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6613 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6617 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6626 for(;s->mb_y < s->mb_height; s->mb_y++){
6627 for(;s->mb_x < s->mb_width; s->mb_x++){
6628 int ret= decode_mb(h);
6633 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6634 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6639 if(++s->mb_x >= s->mb_width){
6641 if(++s->mb_y >= s->mb_height){
6642 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6643 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6647 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6654 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6655 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6656 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6660 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6667 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6670 return -1; //not reached
6673 static int decode_picture_timing(H264Context *h){
6674 MpegEncContext * const s = &h->s;
6675 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6676 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6677 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6679 if(h->sps.pic_struct_present_flag){
6680 unsigned int i, num_clock_ts;
6681 h->sei_pic_struct = get_bits(&s->gb, 4);
6683 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6686 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6688 for (i = 0 ; i < num_clock_ts ; i++){
6689 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6690 unsigned int full_timestamp_flag;
6691 skip_bits(&s->gb, 2); /* ct_type */
6692 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6693 skip_bits(&s->gb, 5); /* counting_type */
6694 full_timestamp_flag = get_bits(&s->gb, 1);
6695 skip_bits(&s->gb, 1); /* discontinuity_flag */
6696 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6697 skip_bits(&s->gb, 8); /* n_frames */
6698 if(full_timestamp_flag){
6699 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6700 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6701 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6703 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6704 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6705 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6706 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6707 if(get_bits(&s->gb, 1)) /* hours_flag */
6708 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6712 if(h->sps.time_offset_length > 0)
6713 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6720 static int decode_unregistered_user_data(H264Context *h, int size){
6721 MpegEncContext * const s = &h->s;
6722 uint8_t user_data[16+256];
6728 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6729 user_data[i]= get_bits(&s->gb, 8);
6733 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6734 if(e==1 && build>=0)
6735 h->x264_build= build;
6737 if(s->avctx->debug & FF_DEBUG_BUGS)
6738 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6741 skip_bits(&s->gb, 8);
6746 static int decode_sei(H264Context *h){
6747 MpegEncContext * const s = &h->s;
6749 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6754 type+= show_bits(&s->gb, 8);
6755 }while(get_bits(&s->gb, 8) == 255);
6759 size+= show_bits(&s->gb, 8);
6760 }while(get_bits(&s->gb, 8) == 255);
6763 case 1: // Picture timing SEI
6764 if(decode_picture_timing(h) < 0)
6768 if(decode_unregistered_user_data(h, size) < 0)
6772 skip_bits(&s->gb, 8*size);
6775 //FIXME check bits here
6776 align_get_bits(&s->gb);
6782 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6783 MpegEncContext * const s = &h->s;
6785 cpb_count = get_ue_golomb(&s->gb) + 1;
6787 if(cpb_count > 32U){
6788 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6792 get_bits(&s->gb, 4); /* bit_rate_scale */
6793 get_bits(&s->gb, 4); /* cpb_size_scale */
6794 for(i=0; i<cpb_count; i++){
6795 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6796 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6797 get_bits1(&s->gb); /* cbr_flag */
6799 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6800 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6801 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6802 sps->time_offset_length = get_bits(&s->gb, 5);
6806 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6807 MpegEncContext * const s = &h->s;
6808 int aspect_ratio_info_present_flag;
6809 unsigned int aspect_ratio_idc;
6811 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6813 if( aspect_ratio_info_present_flag ) {
6814 aspect_ratio_idc= get_bits(&s->gb, 8);
6815 if( aspect_ratio_idc == EXTENDED_SAR ) {
6816 sps->sar.num= get_bits(&s->gb, 16);
6817 sps->sar.den= get_bits(&s->gb, 16);
6818 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6819 sps->sar= pixel_aspect[aspect_ratio_idc];
6821 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6828 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6830 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6831 get_bits1(&s->gb); /* overscan_appropriate_flag */
6834 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6835 get_bits(&s->gb, 3); /* video_format */
6836 get_bits1(&s->gb); /* video_full_range_flag */
6837 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6838 get_bits(&s->gb, 8); /* colour_primaries */
6839 get_bits(&s->gb, 8); /* transfer_characteristics */
6840 get_bits(&s->gb, 8); /* matrix_coefficients */
6844 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6845 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6846 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6849 sps->timing_info_present_flag = get_bits1(&s->gb);
6850 if(sps->timing_info_present_flag){
6851 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6852 sps->time_scale = get_bits_long(&s->gb, 32);
6853 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6856 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6857 if(sps->nal_hrd_parameters_present_flag)
6858 if(decode_hrd_parameters(h, sps) < 0)
6860 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6861 if(sps->vcl_hrd_parameters_present_flag)
6862 if(decode_hrd_parameters(h, sps) < 0)
6864 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6865 get_bits1(&s->gb); /* low_delay_hrd_flag */
6866 sps->pic_struct_present_flag = get_bits1(&s->gb);
6868 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6869 if(sps->bitstream_restriction_flag){
6870 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6871 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6872 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6873 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6874 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6875 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6876 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6878 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6879 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6887 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6888 const uint8_t *jvt_list, const uint8_t *fallback_list){
6889 MpegEncContext * const s = &h->s;
6890 int i, last = 8, next = 8;
6891 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6892 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6893 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6895 for(i=0;i<size;i++){
6897 next = (last + get_se_golomb(&s->gb)) & 0xff;
6898 if(!i && !next){ /* matrix not written, we use the preset one */
6899 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6902 last = factors[scan[i]] = next ? next : last;
6906 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6907 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6908 MpegEncContext * const s = &h->s;
6909 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6910 const uint8_t *fallback[4] = {
6911 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6912 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6913 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6914 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6916 if(get_bits1(&s->gb)){
6917 sps->scaling_matrix_present |= is_sps;
6918 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6919 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6920 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6921 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6922 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6923 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6924 if(is_sps || pps->transform_8x8_mode){
6925 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6926 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6931 static inline int decode_seq_parameter_set(H264Context *h){
6932 MpegEncContext * const s = &h->s;
6933 int profile_idc, level_idc;
6934 unsigned int sps_id;
6938 profile_idc= get_bits(&s->gb, 8);
6939 get_bits1(&s->gb); //constraint_set0_flag
6940 get_bits1(&s->gb); //constraint_set1_flag
6941 get_bits1(&s->gb); //constraint_set2_flag
6942 get_bits1(&s->gb); //constraint_set3_flag
6943 get_bits(&s->gb, 4); // reserved
6944 level_idc= get_bits(&s->gb, 8);
6945 sps_id= get_ue_golomb(&s->gb);
6947 if(sps_id >= MAX_SPS_COUNT) {
6948 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
6951 sps= av_mallocz(sizeof(SPS));
6955 sps->profile_idc= profile_idc;
6956 sps->level_idc= level_idc;
6958 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
6959 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
6960 sps->scaling_matrix_present = 0;
6962 if(sps->profile_idc >= 100){ //high profile
6963 sps->chroma_format_idc= get_ue_golomb(&s->gb);
6964 if(sps->chroma_format_idc == 3)
6965 get_bits1(&s->gb); //residual_color_transform_flag
6966 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6967 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6968 sps->transform_bypass = get_bits1(&s->gb);
6969 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6971 sps->chroma_format_idc= 1;
6974 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6975 sps->poc_type= get_ue_golomb(&s->gb);
6977 if(sps->poc_type == 0){ //FIXME #define
6978 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6979 } else if(sps->poc_type == 1){//FIXME #define
6980 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6981 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6982 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6983 sps->poc_cycle_length = get_ue_golomb(&s->gb);
6985 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
6986 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
6990 for(i=0; i<sps->poc_cycle_length; i++)
6991 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6992 }else if(sps->poc_type != 2){
6993 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6997 sps->ref_frame_count= get_ue_golomb(&s->gb);
6998 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
6999 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7002 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7003 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7004 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7005 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7006 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7007 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7011 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7012 if(!sps->frame_mbs_only_flag)
7013 sps->mb_aff= get_bits1(&s->gb);
7017 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7019 #ifndef ALLOW_INTERLACE
7021 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7023 sps->crop= get_bits1(&s->gb);
7025 sps->crop_left = get_ue_golomb(&s->gb);
7026 sps->crop_right = get_ue_golomb(&s->gb);
7027 sps->crop_top = get_ue_golomb(&s->gb);
7028 sps->crop_bottom= get_ue_golomb(&s->gb);
7029 if(sps->crop_left || sps->crop_top){
7030 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7032 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7033 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7039 sps->crop_bottom= 0;
7042 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7043 if( sps->vui_parameters_present_flag )
7044 decode_vui_parameters(h, sps);
7046 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7047 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7048 sps_id, sps->profile_idc, sps->level_idc,
7050 sps->ref_frame_count,
7051 sps->mb_width, sps->mb_height,
7052 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7053 sps->direct_8x8_inference_flag ? "8B8" : "",
7054 sps->crop_left, sps->crop_right,
7055 sps->crop_top, sps->crop_bottom,
7056 sps->vui_parameters_present_flag ? "VUI" : "",
7057 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7060 av_free(h->sps_buffers[sps_id]);
7061 h->sps_buffers[sps_id]= sps;
7069 build_qp_table(PPS *pps, int t, int index)
7072 for(i = 0; i < 52; i++)
7073 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7076 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7077 MpegEncContext * const s = &h->s;
7078 unsigned int pps_id= get_ue_golomb(&s->gb);
7081 if(pps_id >= MAX_PPS_COUNT) {
7082 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7086 pps= av_mallocz(sizeof(PPS));
7089 pps->sps_id= get_ue_golomb(&s->gb);
7090 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7091 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7095 pps->cabac= get_bits1(&s->gb);
7096 pps->pic_order_present= get_bits1(&s->gb);
7097 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7098 if(pps->slice_group_count > 1 ){
7099 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7100 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7101 switch(pps->mb_slice_group_map_type){
7104 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7105 | run_length[ i ] |1 |ue(v) |
7110 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7112 | top_left_mb[ i ] |1 |ue(v) |
7113 | bottom_right_mb[ i ] |1 |ue(v) |
7121 | slice_group_change_direction_flag |1 |u(1) |
7122 | slice_group_change_rate_minus1 |1 |ue(v) |
7127 | slice_group_id_cnt_minus1 |1 |ue(v) |
7128 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7130 | slice_group_id[ i ] |1 |u(v) |
7135 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7136 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7137 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7138 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7142 pps->weighted_pred= get_bits1(&s->gb);
7143 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7144 pps->init_qp= get_se_golomb(&s->gb) + 26;
7145 pps->init_qs= get_se_golomb(&s->gb) + 26;
7146 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7147 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7148 pps->constrained_intra_pred= get_bits1(&s->gb);
7149 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7151 pps->transform_8x8_mode= 0;
7152 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7153 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7154 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7156 if(get_bits_count(&s->gb) < bit_length){
7157 pps->transform_8x8_mode= get_bits1(&s->gb);
7158 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7159 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7161 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7164 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7165 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7166 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7167 h->pps.chroma_qp_diff= 1;
7169 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7170 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7171 pps_id, pps->sps_id,
7172 pps->cabac ? "CABAC" : "CAVLC",
7173 pps->slice_group_count,
7174 pps->ref_count[0], pps->ref_count[1],
7175 pps->weighted_pred ? "weighted" : "",
7176 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7177 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7178 pps->constrained_intra_pred ? "CONSTR" : "",
7179 pps->redundant_pic_cnt_present ? "REDU" : "",
7180 pps->transform_8x8_mode ? "8x8DCT" : ""
7184 av_free(h->pps_buffers[pps_id]);
7185 h->pps_buffers[pps_id]= pps;
7193 * Call decode_slice() for each context.
7195 * @param h h264 master context
7196 * @param context_count number of contexts to execute
7198 static void execute_decode_slices(H264Context *h, int context_count){
7199 MpegEncContext * const s = &h->s;
7200 AVCodecContext * const avctx= s->avctx;
7204 if(context_count == 1) {
7205 decode_slice(avctx, &h);
7207 for(i = 1; i < context_count; i++) {
7208 hx = h->thread_context[i];
7209 hx->s.error_recognition = avctx->error_recognition;
7210 hx->s.error_count = 0;
7213 avctx->execute(avctx, (void *)decode_slice,
7214 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7216 /* pull back stuff from slices to master context */
7217 hx = h->thread_context[context_count - 1];
7218 s->mb_x = hx->s.mb_x;
7219 s->mb_y = hx->s.mb_y;
7220 s->dropable = hx->s.dropable;
7221 s->picture_structure = hx->s.picture_structure;
7222 for(i = 1; i < context_count; i++)
7223 h->s.error_count += h->thread_context[i]->s.error_count;
7228 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7229 MpegEncContext * const s = &h->s;
7230 AVCodecContext * const avctx= s->avctx;
7232 H264Context *hx; ///< thread context
7233 int context_count = 0;
7235 h->max_contexts = avctx->thread_count;
7238 for(i=0; i<50; i++){
7239 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7242 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7243 h->current_slice = 0;
7244 if (!s->first_field)
7245 s->current_picture_ptr= NULL;
7257 if(buf_index >= buf_size) break;
7259 for(i = 0; i < h->nal_length_size; i++)
7260 nalsize = (nalsize << 8) | buf[buf_index++];
7261 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7266 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7271 // start code prefix search
7272 for(; buf_index + 3 < buf_size; buf_index++){
7273 // This should always succeed in the first iteration.
7274 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7278 if(buf_index+3 >= buf_size) break;
7283 hx = h->thread_context[context_count];
7285 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7286 if (ptr==NULL || dst_length < 0){
7289 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7291 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7293 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7294 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7297 if (h->is_avc && (nalsize != consumed)){
7298 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7302 buf_index += consumed;
7304 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7305 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7310 switch(hx->nal_unit_type){
7312 if (h->nal_unit_type != NAL_IDR_SLICE) {
7313 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7316 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7318 init_get_bits(&hx->s.gb, ptr, bit_length);
7320 hx->inter_gb_ptr= &hx->s.gb;
7321 hx->s.data_partitioning = 0;
7323 if((err = decode_slice_header(hx, h)))
7326 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7327 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7328 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7329 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7330 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7331 && avctx->skip_frame < AVDISCARD_ALL)
7335 init_get_bits(&hx->s.gb, ptr, bit_length);
7337 hx->inter_gb_ptr= NULL;
7338 hx->s.data_partitioning = 1;
7340 err = decode_slice_header(hx, h);
7343 init_get_bits(&hx->intra_gb, ptr, bit_length);
7344 hx->intra_gb_ptr= &hx->intra_gb;
7347 init_get_bits(&hx->inter_gb, ptr, bit_length);
7348 hx->inter_gb_ptr= &hx->inter_gb;
7350 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7351 && s->context_initialized
7353 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7354 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7355 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7356 && avctx->skip_frame < AVDISCARD_ALL)
7360 init_get_bits(&s->gb, ptr, bit_length);
7364 init_get_bits(&s->gb, ptr, bit_length);
7365 decode_seq_parameter_set(h);
7367 if(s->flags& CODEC_FLAG_LOW_DELAY)
7370 if(avctx->has_b_frames < 2)
7371 avctx->has_b_frames= !s->low_delay;
7374 init_get_bits(&s->gb, ptr, bit_length);
7376 decode_picture_parameter_set(h, bit_length);
7380 case NAL_END_SEQUENCE:
7381 case NAL_END_STREAM:
7382 case NAL_FILLER_DATA:
7384 case NAL_AUXILIARY_SLICE:
7387 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7390 if(context_count == h->max_contexts) {
7391 execute_decode_slices(h, context_count);
7396 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7398 /* Slice could not be decoded in parallel mode, copy down
7399 * NAL unit stuff to context 0 and restart. Note that
7400 * rbsp_buffer is not transferred, but since we no longer
7401 * run in parallel mode this should not be an issue. */
7402 h->nal_unit_type = hx->nal_unit_type;
7403 h->nal_ref_idc = hx->nal_ref_idc;
7409 execute_decode_slices(h, context_count);
7414 * returns the number of bytes consumed for building the current frame
7416 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7417 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7418 if(pos+10>buf_size) pos=buf_size; // oops ;)
7423 static int decode_frame(AVCodecContext *avctx,
7424 void *data, int *data_size,
7425 const uint8_t *buf, int buf_size)
7427 H264Context *h = avctx->priv_data;
7428 MpegEncContext *s = &h->s;
7429 AVFrame *pict = data;
7432 s->flags= avctx->flags;
7433 s->flags2= avctx->flags2;
7435 /* end of stream, output what is still in the buffers */
7436 if (buf_size == 0) {
7440 //FIXME factorize this with the output code below
7441 out = h->delayed_pic[0];
7443 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7444 if(h->delayed_pic[i]->poc < out->poc){
7445 out = h->delayed_pic[i];
7449 for(i=out_idx; h->delayed_pic[i]; i++)
7450 h->delayed_pic[i] = h->delayed_pic[i+1];
7453 *data_size = sizeof(AVFrame);
7454 *pict= *(AVFrame*)out;
7460 if(h->is_avc && !h->got_avcC) {
7461 int i, cnt, nalsize;
7462 unsigned char *p = avctx->extradata;
7463 if(avctx->extradata_size < 7) {
7464 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7468 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7471 /* sps and pps in the avcC always have length coded with 2 bytes,
7472 so put a fake nal_length_size = 2 while parsing them */
7473 h->nal_length_size = 2;
7474 // Decode sps from avcC
7475 cnt = *(p+5) & 0x1f; // Number of sps
7477 for (i = 0; i < cnt; i++) {
7478 nalsize = AV_RB16(p) + 2;
7479 if(decode_nal_units(h, p, nalsize) < 0) {
7480 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7485 // Decode pps from avcC
7486 cnt = *(p++); // Number of pps
7487 for (i = 0; i < cnt; i++) {
7488 nalsize = AV_RB16(p) + 2;
7489 if(decode_nal_units(h, p, nalsize) != nalsize) {
7490 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7495 // Now store right nal length size, that will be use to parse all other nals
7496 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7497 // Do not reparse avcC
7501 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7502 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7507 buf_index=decode_nal_units(h, buf, buf_size);
7511 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7512 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7513 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7517 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7518 Picture *out = s->current_picture_ptr;
7519 Picture *cur = s->current_picture_ptr;
7520 int i, pics, cross_idr, out_of_order, out_idx;
7524 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7525 s->current_picture_ptr->pict_type= s->pict_type;
7528 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7529 h->prev_poc_msb= h->poc_msb;
7530 h->prev_poc_lsb= h->poc_lsb;
7532 h->prev_frame_num_offset= h->frame_num_offset;
7533 h->prev_frame_num= h->frame_num;
7536 * FIXME: Error handling code does not seem to support interlaced
7537 * when slices span multiple rows
7538 * The ff_er_add_slice calls don't work right for bottom
7539 * fields; they cause massive erroneous error concealing
7540 * Error marking covers both fields (top and bottom).
7541 * This causes a mismatched s->error_count
7542 * and a bad error table. Further, the error count goes to
7543 * INT_MAX when called for bottom field, because mb_y is
7544 * past end by one (callers fault) and resync_mb_y != 0
7545 * causes problems for the first MB line, too.
7552 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7553 /* Wait for second field. */
7557 cur->repeat_pict = 0;
7559 /* Signal interlacing information externally. */
7560 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7561 if(h->sps.pic_struct_present_flag){
7562 switch (h->sei_pic_struct)
7564 case SEI_PIC_STRUCT_FRAME:
7565 cur->interlaced_frame = 0;
7567 case SEI_PIC_STRUCT_TOP_FIELD:
7568 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7569 case SEI_PIC_STRUCT_TOP_BOTTOM:
7570 case SEI_PIC_STRUCT_BOTTOM_TOP:
7571 cur->interlaced_frame = 1;
7573 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7574 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7575 // Signal the possibility of telecined film externally (pic_struct 5,6)
7576 // From these hints, let the applications decide if they apply deinterlacing.
7577 cur->repeat_pict = 1;
7578 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7580 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7581 // Force progressive here, as doubling interlaced frame is a bad idea.
7582 cur->interlaced_frame = 0;
7583 cur->repeat_pict = 2;
7585 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7586 cur->interlaced_frame = 0;
7587 cur->repeat_pict = 4;
7591 /* Derive interlacing flag from used decoding process. */
7592 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7595 if (cur->field_poc[0] != cur->field_poc[1]){
7596 /* Derive top_field_first from field pocs. */
7597 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7599 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7600 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7601 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7602 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7603 cur->top_field_first = 1;
7605 cur->top_field_first = 0;
7607 /* Most likely progressive */
7608 cur->top_field_first = 0;
7612 //FIXME do something with unavailable reference frames
7614 /* Sort B-frames into display order */
7616 if(h->sps.bitstream_restriction_flag
7617 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7618 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7622 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7623 && !h->sps.bitstream_restriction_flag){
7624 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7629 while(h->delayed_pic[pics]) pics++;
7631 assert(pics <= MAX_DELAYED_PIC_COUNT);
7633 h->delayed_pic[pics++] = cur;
7634 if(cur->reference == 0)
7635 cur->reference = DELAYED_PIC_REF;
7637 out = h->delayed_pic[0];
7639 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7640 if(h->delayed_pic[i]->poc < out->poc){
7641 out = h->delayed_pic[i];
7644 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7646 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7648 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7650 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7652 ((!cross_idr && out->poc > h->outputed_poc + 2)
7653 || cur->pict_type == FF_B_TYPE)))
7656 s->avctx->has_b_frames++;
7659 if(out_of_order || pics > s->avctx->has_b_frames){
7660 out->reference &= ~DELAYED_PIC_REF;
7661 for(i=out_idx; h->delayed_pic[i]; i++)
7662 h->delayed_pic[i] = h->delayed_pic[i+1];
7664 if(!out_of_order && pics > s->avctx->has_b_frames){
7665 *data_size = sizeof(AVFrame);
7667 h->outputed_poc = out->poc;
7668 *pict= *(AVFrame*)out;
7670 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7675 assert(pict->data[0] || !*data_size);
7676 ff_print_debug_info(s, pict);
7677 //printf("out %d\n", (int)pict->data[0]);
7680 /* Return the Picture timestamp as the frame number */
7681 /* we subtract 1 because it is added on utils.c */
7682 avctx->frame_number = s->picture_number - 1;
7684 return get_consumed_bytes(s, buf_index, buf_size);
7687 static inline void fill_mb_avail(H264Context *h){
7688 MpegEncContext * const s = &h->s;
7689 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7692 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7693 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7694 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7700 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7701 h->mb_avail[4]= 1; //FIXME move out
7702 h->mb_avail[5]= 0; //FIXME move out
7710 #define SIZE (COUNT*40)
7716 // int int_temp[10000];
7718 AVCodecContext avctx;
7720 dsputil_init(&dsp, &avctx);
7722 init_put_bits(&pb, temp, SIZE);
7723 printf("testing unsigned exp golomb\n");
7724 for(i=0; i<COUNT; i++){
7726 set_ue_golomb(&pb, i);
7727 STOP_TIMER("set_ue_golomb");
7729 flush_put_bits(&pb);
7731 init_get_bits(&gb, temp, 8*SIZE);
7732 for(i=0; i<COUNT; i++){
7735 s= show_bits(&gb, 24);
7738 j= get_ue_golomb(&gb);
7740 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7743 STOP_TIMER("get_ue_golomb");
7747 init_put_bits(&pb, temp, SIZE);
7748 printf("testing signed exp golomb\n");
7749 for(i=0; i<COUNT; i++){
7751 set_se_golomb(&pb, i - COUNT/2);
7752 STOP_TIMER("set_se_golomb");
7754 flush_put_bits(&pb);
7756 init_get_bits(&gb, temp, 8*SIZE);
7757 for(i=0; i<COUNT; i++){
7760 s= show_bits(&gb, 24);
7763 j= get_se_golomb(&gb);
7764 if(j != i - COUNT/2){
7765 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7768 STOP_TIMER("get_se_golomb");
7772 printf("testing 4x4 (I)DCT\n");
7775 uint8_t src[16], ref[16];
7776 uint64_t error= 0, max_error=0;
7778 for(i=0; i<COUNT; i++){
7780 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7781 for(j=0; j<16; j++){
7782 ref[j]= random()%255;
7783 src[j]= random()%255;
7786 h264_diff_dct_c(block, src, ref, 4);
7789 for(j=0; j<16; j++){
7790 // printf("%d ", block[j]);
7791 block[j]= block[j]*4;
7792 if(j&1) block[j]= (block[j]*4 + 2)/5;
7793 if(j&4) block[j]= (block[j]*4 + 2)/5;
7797 s->dsp.h264_idct_add(ref, block, 4);
7798 /* for(j=0; j<16; j++){
7799 printf("%d ", ref[j]);
7803 for(j=0; j<16; j++){
7804 int diff= FFABS(src[j] - ref[j]);
7807 max_error= FFMAX(max_error, diff);
7810 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7811 printf("testing quantizer\n");
7812 for(qp=0; qp<52; qp++){
7814 src1_block[i]= src2_block[i]= random()%255;
7817 printf("Testing NAL layer\n");
7819 uint8_t bitstream[COUNT];
7820 uint8_t nal[COUNT*2];
7822 memset(&h, 0, sizeof(H264Context));
7824 for(i=0; i<COUNT; i++){
7832 for(j=0; j<COUNT; j++){
7833 bitstream[j]= (random() % 255) + 1;
7836 for(j=0; j<zeros; j++){
7837 int pos= random() % COUNT;
7838 while(bitstream[pos] == 0){
7847 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7849 printf("encoding failed\n");
7853 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7857 if(out_length != COUNT){
7858 printf("incorrect length %d %d\n", out_length, COUNT);
7862 if(consumed != nal_length){
7863 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7867 if(memcmp(bitstream, out, COUNT)){
7868 printf("mismatch\n");
7874 printf("Testing RBSP\n");
7882 static av_cold int decode_end(AVCodecContext *avctx)
7884 H264Context *h = avctx->priv_data;
7885 MpegEncContext *s = &h->s;
7888 av_freep(&h->rbsp_buffer[0]);
7889 av_freep(&h->rbsp_buffer[1]);
7890 free_tables(h); //FIXME cleanup init stuff perhaps
7892 for(i = 0; i < MAX_SPS_COUNT; i++)
7893 av_freep(h->sps_buffers + i);
7895 for(i = 0; i < MAX_PPS_COUNT; i++)
7896 av_freep(h->pps_buffers + i);
7900 // memset(h, 0, sizeof(H264Context));
7906 AVCodec h264_decoder = {
7910 sizeof(H264Context),
7915 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7917 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),