2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "x86/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1433 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1436 *consumed= si + 1;//+1 for the header
1437 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1442 * identifies the exact end of the bitstream
1443 * @return the length of the trailing, or 0 if damaged
1445 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1449 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1459 * IDCT transforms the 16 dc values and dequantizes them.
1460 * @param qp quantization parameter
1462 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1465 int temp[16]; //FIXME check if this is a good idea
1466 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1467 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1469 //memset(block, 64, 2*256);
1472 const int offset= y_offset[i];
1473 const int z0= block[offset+stride*0] + block[offset+stride*4];
1474 const int z1= block[offset+stride*0] - block[offset+stride*4];
1475 const int z2= block[offset+stride*1] - block[offset+stride*5];
1476 const int z3= block[offset+stride*1] + block[offset+stride*5];
1485 const int offset= x_offset[i];
1486 const int z0= temp[4*0+i] + temp[4*2+i];
1487 const int z1= temp[4*0+i] - temp[4*2+i];
1488 const int z2= temp[4*1+i] - temp[4*3+i];
1489 const int z3= temp[4*1+i] + temp[4*3+i];
1491 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1492 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1493 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1494 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1500 * DCT transforms the 16 dc values.
1501 * @param qp quantization parameter ??? FIXME
1503 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1504 // const int qmul= dequant_coeff[qp][0];
1506 int temp[16]; //FIXME check if this is a good idea
1507 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1508 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1511 const int offset= y_offset[i];
1512 const int z0= block[offset+stride*0] + block[offset+stride*4];
1513 const int z1= block[offset+stride*0] - block[offset+stride*4];
1514 const int z2= block[offset+stride*1] - block[offset+stride*5];
1515 const int z3= block[offset+stride*1] + block[offset+stride*5];
1524 const int offset= x_offset[i];
1525 const int z0= temp[4*0+i] + temp[4*2+i];
1526 const int z1= temp[4*0+i] - temp[4*2+i];
1527 const int z2= temp[4*1+i] - temp[4*3+i];
1528 const int z3= temp[4*1+i] + temp[4*3+i];
1530 block[stride*0 +offset]= (z0 + z3)>>1;
1531 block[stride*2 +offset]= (z1 + z2)>>1;
1532 block[stride*8 +offset]= (z1 - z2)>>1;
1533 block[stride*10+offset]= (z0 - z3)>>1;
1541 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1542 const int stride= 16*2;
1543 const int xStride= 16;
1546 a= block[stride*0 + xStride*0];
1547 b= block[stride*0 + xStride*1];
1548 c= block[stride*1 + xStride*0];
1549 d= block[stride*1 + xStride*1];
1556 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1557 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1558 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1559 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1563 static void chroma_dc_dct_c(DCTELEM *block){
1564 const int stride= 16*2;
1565 const int xStride= 16;
1568 a= block[stride*0 + xStride*0];
1569 b= block[stride*0 + xStride*1];
1570 c= block[stride*1 + xStride*0];
1571 d= block[stride*1 + xStride*1];
1578 block[stride*0 + xStride*0]= (a+c);
1579 block[stride*0 + xStride*1]= (e+b);
1580 block[stride*1 + xStride*0]= (a-c);
1581 block[stride*1 + xStride*1]= (e-b);
1586 * gets the chroma qp.
1588 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1589 return h->pps.chroma_qp_table[t][qscale];
1592 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1593 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1594 int src_x_offset, int src_y_offset,
1595 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1596 MpegEncContext * const s = &h->s;
1597 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1598 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1599 const int luma_xy= (mx&3) + ((my&3)<<2);
1600 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1601 uint8_t * src_cb, * src_cr;
1602 int extra_width= h->emu_edge_width;
1603 int extra_height= h->emu_edge_height;
1605 const int full_mx= mx>>2;
1606 const int full_my= my>>2;
1607 const int pic_width = 16*s->mb_width;
1608 const int pic_height = 16*s->mb_height >> MB_FIELD;
1610 if(mx&7) extra_width -= 3;
1611 if(my&7) extra_height -= 3;
1613 if( full_mx < 0-extra_width
1614 || full_my < 0-extra_height
1615 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1616 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1617 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1618 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1622 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1624 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1627 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1630 // chroma offset when predicting from a field of opposite parity
1631 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1632 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1634 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1635 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1638 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1639 src_cb= s->edge_emu_buffer;
1641 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1644 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1645 src_cr= s->edge_emu_buffer;
1647 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1650 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1651 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1652 int x_offset, int y_offset,
1653 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1654 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1655 int list0, int list1){
1656 MpegEncContext * const s = &h->s;
1657 qpel_mc_func *qpix_op= qpix_put;
1658 h264_chroma_mc_func chroma_op= chroma_put;
1660 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1661 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1662 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1663 x_offset += 8*s->mb_x;
1664 y_offset += 8*(s->mb_y >> MB_FIELD);
1667 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1668 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1669 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1670 qpix_op, chroma_op);
1673 chroma_op= chroma_avg;
1677 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1684 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1685 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1686 int x_offset, int y_offset,
1687 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1688 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1689 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1690 int list0, int list1){
1691 MpegEncContext * const s = &h->s;
1693 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1694 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1695 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1696 x_offset += 8*s->mb_x;
1697 y_offset += 8*(s->mb_y >> MB_FIELD);
1700 /* don't optimize for luma-only case, since B-frames usually
1701 * use implicit weights => chroma too. */
1702 uint8_t *tmp_cb = s->obmc_scratchpad;
1703 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1704 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1705 int refn0 = h->ref_cache[0][ scan8[n] ];
1706 int refn1 = h->ref_cache[1][ scan8[n] ];
1708 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1709 dest_y, dest_cb, dest_cr,
1710 x_offset, y_offset, qpix_put, chroma_put);
1711 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1712 tmp_y, tmp_cb, tmp_cr,
1713 x_offset, y_offset, qpix_put, chroma_put);
1715 if(h->use_weight == 2){
1716 int weight0 = h->implicit_weight[refn0][refn1];
1717 int weight1 = 64 - weight0;
1718 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1719 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1720 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1722 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1723 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1724 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1725 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1726 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1727 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1728 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1729 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1730 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1733 int list = list1 ? 1 : 0;
1734 int refn = h->ref_cache[list][ scan8[n] ];
1735 Picture *ref= &h->ref_list[list][refn];
1736 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1737 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1738 qpix_put, chroma_put);
1740 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1741 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1742 if(h->use_weight_chroma){
1743 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1745 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1746 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1751 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1752 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1753 int x_offset, int y_offset,
1754 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1755 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1756 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1757 int list0, int list1){
1758 if((h->use_weight==2 && list0 && list1
1759 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1760 || h->use_weight==1)
1761 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1762 x_offset, y_offset, qpix_put, chroma_put,
1763 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1765 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1766 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1769 static inline void prefetch_motion(H264Context *h, int list){
1770 /* fetch pixels for estimated mv 4 macroblocks ahead
1771 * optimized for 64byte cache lines */
1772 MpegEncContext * const s = &h->s;
1773 const int refn = h->ref_cache[list][scan8[0]];
1775 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1776 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1777 uint8_t **src= h->ref_list[list][refn].data;
1778 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1779 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1780 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1781 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1785 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1786 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1787 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1788 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1789 MpegEncContext * const s = &h->s;
1790 const int mb_xy= h->mb_xy;
1791 const int mb_type= s->current_picture.mb_type[mb_xy];
1793 assert(IS_INTER(mb_type));
1795 prefetch_motion(h, 0);
1797 if(IS_16X16(mb_type)){
1798 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1799 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1800 &weight_op[0], &weight_avg[0],
1801 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1802 }else if(IS_16X8(mb_type)){
1803 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1804 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1805 &weight_op[1], &weight_avg[1],
1806 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1807 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1808 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1809 &weight_op[1], &weight_avg[1],
1810 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1811 }else if(IS_8X16(mb_type)){
1812 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1813 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1814 &weight_op[2], &weight_avg[2],
1815 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1816 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1817 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1818 &weight_op[2], &weight_avg[2],
1819 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1823 assert(IS_8X8(mb_type));
1826 const int sub_mb_type= h->sub_mb_type[i];
1828 int x_offset= (i&1)<<2;
1829 int y_offset= (i&2)<<1;
1831 if(IS_SUB_8X8(sub_mb_type)){
1832 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1833 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1834 &weight_op[3], &weight_avg[3],
1835 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1836 }else if(IS_SUB_8X4(sub_mb_type)){
1837 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1838 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1839 &weight_op[4], &weight_avg[4],
1840 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1841 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1842 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1843 &weight_op[4], &weight_avg[4],
1844 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1845 }else if(IS_SUB_4X8(sub_mb_type)){
1846 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1847 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1848 &weight_op[5], &weight_avg[5],
1849 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1850 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1851 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1852 &weight_op[5], &weight_avg[5],
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1856 assert(IS_SUB_4X4(sub_mb_type));
1858 int sub_x_offset= x_offset + 2*(j&1);
1859 int sub_y_offset= y_offset + (j&2);
1860 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[6], &weight_avg[6],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1869 prefetch_motion(h, 1);
1872 static av_cold void decode_init_vlc(void){
1873 static int done = 0;
1880 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1881 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1882 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1883 &chroma_dc_coeff_token_len [0], 1, 1,
1884 &chroma_dc_coeff_token_bits[0], 1, 1,
1885 INIT_VLC_USE_NEW_STATIC);
1889 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1890 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1891 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1892 &coeff_token_len [i][0], 1, 1,
1893 &coeff_token_bits[i][0], 1, 1,
1894 INIT_VLC_USE_NEW_STATIC);
1895 offset += coeff_token_vlc_tables_size[i];
1898 * This is a one time safety check to make sure that
1899 * the packed static coeff_token_vlc table sizes
1900 * were initialized correctly.
1902 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1905 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1906 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1907 init_vlc(&chroma_dc_total_zeros_vlc[i],
1908 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1909 &chroma_dc_total_zeros_len [i][0], 1, 1,
1910 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1911 INIT_VLC_USE_NEW_STATIC);
1913 for(i=0; i<15; i++){
1914 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1915 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1916 init_vlc(&total_zeros_vlc[i],
1917 TOTAL_ZEROS_VLC_BITS, 16,
1918 &total_zeros_len [i][0], 1, 1,
1919 &total_zeros_bits[i][0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1924 run_vlc[i].table = run_vlc_tables[i];
1925 run_vlc[i].table_allocated = run_vlc_tables_size;
1926 init_vlc(&run_vlc[i],
1928 &run_len [i][0], 1, 1,
1929 &run_bits[i][0], 1, 1,
1930 INIT_VLC_USE_NEW_STATIC);
1932 run7_vlc.table = run7_vlc_table,
1933 run7_vlc.table_allocated = run7_vlc_table_size;
1934 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1935 &run_len [6][0], 1, 1,
1936 &run_bits[6][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1941 static void free_tables(H264Context *h){
1944 av_freep(&h->intra4x4_pred_mode);
1945 av_freep(&h->chroma_pred_mode_table);
1946 av_freep(&h->cbp_table);
1947 av_freep(&h->mvd_table[0]);
1948 av_freep(&h->mvd_table[1]);
1949 av_freep(&h->direct_table);
1950 av_freep(&h->non_zero_count);
1951 av_freep(&h->slice_table_base);
1952 h->slice_table= NULL;
1954 av_freep(&h->mb2b_xy);
1955 av_freep(&h->mb2b8_xy);
1957 for(i = 0; i < h->s.avctx->thread_count; i++) {
1958 hx = h->thread_context[i];
1960 av_freep(&hx->top_borders[1]);
1961 av_freep(&hx->top_borders[0]);
1962 av_freep(&hx->s.obmc_scratchpad);
1966 static void init_dequant8_coeff_table(H264Context *h){
1968 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1969 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1970 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1972 for(i=0; i<2; i++ ){
1973 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1974 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1978 for(q=0; q<52; q++){
1979 int shift = div6[q];
1982 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1983 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1984 h->pps.scaling_matrix8[i][x]) << shift;
1989 static void init_dequant4_coeff_table(H264Context *h){
1991 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1992 for(i=0; i<6; i++ ){
1993 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1995 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1996 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2003 for(q=0; q<52; q++){
2004 int shift = div6[q] + 2;
2007 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2008 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2009 h->pps.scaling_matrix4[i][x]) << shift;
2014 static void init_dequant_tables(H264Context *h){
2016 init_dequant4_coeff_table(h);
2017 if(h->pps.transform_8x8_mode)
2018 init_dequant8_coeff_table(h);
2019 if(h->sps.transform_bypass){
2022 h->dequant4_coeff[i][0][x] = 1<<6;
2023 if(h->pps.transform_8x8_mode)
2026 h->dequant8_coeff[i][0][x] = 1<<6;
2033 * needs width/height
2035 static int alloc_tables(H264Context *h){
2036 MpegEncContext * const s = &h->s;
2037 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2040 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2042 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2043 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2044 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2046 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2047 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2048 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2049 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2051 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2052 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2054 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2055 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2056 for(y=0; y<s->mb_height; y++){
2057 for(x=0; x<s->mb_width; x++){
2058 const int mb_xy= x + y*s->mb_stride;
2059 const int b_xy = 4*x + 4*y*h->b_stride;
2060 const int b8_xy= 2*x + 2*y*h->b8_stride;
2062 h->mb2b_xy [mb_xy]= b_xy;
2063 h->mb2b8_xy[mb_xy]= b8_xy;
2067 s->obmc_scratchpad = NULL;
2069 if(!h->dequant4_coeff[0])
2070 init_dequant_tables(h);
2079 * Mimic alloc_tables(), but for every context thread.
2081 static void clone_tables(H264Context *dst, H264Context *src){
2082 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2083 dst->non_zero_count = src->non_zero_count;
2084 dst->slice_table = src->slice_table;
2085 dst->cbp_table = src->cbp_table;
2086 dst->mb2b_xy = src->mb2b_xy;
2087 dst->mb2b8_xy = src->mb2b8_xy;
2088 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2089 dst->mvd_table[0] = src->mvd_table[0];
2090 dst->mvd_table[1] = src->mvd_table[1];
2091 dst->direct_table = src->direct_table;
2093 dst->s.obmc_scratchpad = NULL;
2094 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2099 * Allocate buffers which are not shared amongst multiple threads.
2101 static int context_init(H264Context *h){
2102 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2107 return -1; // free_tables will clean up for us
2110 static av_cold void common_init(H264Context *h){
2111 MpegEncContext * const s = &h->s;
2113 s->width = s->avctx->width;
2114 s->height = s->avctx->height;
2115 s->codec_id= s->avctx->codec->id;
2117 ff_h264_pred_init(&h->hpc, s->codec_id);
2119 h->dequant_coeff_pps= -1;
2120 s->unrestricted_mv=1;
2121 s->decode=1; //FIXME
2123 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2125 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2126 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2129 static av_cold int decode_init(AVCodecContext *avctx){
2130 H264Context *h= avctx->priv_data;
2131 MpegEncContext * const s = &h->s;
2133 MPV_decode_defaults(s);
2138 s->out_format = FMT_H264;
2139 s->workaround_bugs= avctx->workaround_bugs;
2142 // s->decode_mb= ff_h263_decode_mb;
2143 s->quarter_sample = 1;
2146 if(avctx->codec_id == CODEC_ID_SVQ3)
2147 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2149 avctx->pix_fmt= PIX_FMT_YUV420P;
2153 if(avctx->extradata_size > 0 && avctx->extradata &&
2154 *(char *)avctx->extradata == 1){
2161 h->thread_context[0] = h;
2162 h->outputed_poc = INT_MIN;
2163 h->prev_poc_msb= 1<<16;
2167 static int frame_start(H264Context *h){
2168 MpegEncContext * const s = &h->s;
2171 if(MPV_frame_start(s, s->avctx) < 0)
2173 ff_er_frame_start(s);
2175 * MPV_frame_start uses pict_type to derive key_frame.
2176 * This is incorrect for H.264; IDR markings must be used.
2177 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2178 * See decode_nal_units().
2180 s->current_picture_ptr->key_frame= 0;
2182 assert(s->linesize && s->uvlinesize);
2184 for(i=0; i<16; i++){
2185 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2186 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2189 h->block_offset[16+i]=
2190 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2191 h->block_offset[24+16+i]=
2192 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2195 /* can't be in alloc_tables because linesize isn't known there.
2196 * FIXME: redo bipred weight to not require extra buffer? */
2197 for(i = 0; i < s->avctx->thread_count; i++)
2198 if(!h->thread_context[i]->s.obmc_scratchpad)
2199 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2201 /* some macroblocks will be accessed before they're available */
2202 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2203 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2205 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2207 // We mark the current picture as non-reference after allocating it, so
2208 // that if we break out due to an error it can be released automatically
2209 // in the next MPV_frame_start().
2210 // SVQ3 as well as most other codecs have only last/next/current and thus
2211 // get released even with set reference, besides SVQ3 and others do not
2212 // mark frames as reference later "naturally".
2213 if(s->codec_id != CODEC_ID_SVQ3)
2214 s->current_picture_ptr->reference= 0;
2216 s->current_picture_ptr->field_poc[0]=
2217 s->current_picture_ptr->field_poc[1]= INT_MAX;
2218 assert(s->current_picture_ptr->long_ref==0);
2223 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2224 MpegEncContext * const s = &h->s;
2233 src_cb -= uvlinesize;
2234 src_cr -= uvlinesize;
2236 if(!simple && FRAME_MBAFF){
2238 offset = MB_MBAFF ? 1 : 17;
2239 uvoffset= MB_MBAFF ? 1 : 9;
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2242 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2243 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2244 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2245 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2250 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2251 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2252 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2253 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2259 top_idx = MB_MBAFF ? 0 : 1;
2261 step= MB_MBAFF ? 2 : 1;
2264 // There are two lines saved, the line above the the top macroblock of a pair,
2265 // and the line above the bottom macroblock
2266 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2267 for(i=1; i<17 - skiplast; i++){
2268 h->left_border[offset+i*step]= src_y[15+i* linesize];
2271 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2272 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2274 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2275 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2276 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2277 for(i=1; i<9 - skiplast; i++){
2278 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2279 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2281 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2282 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2286 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2287 MpegEncContext * const s = &h->s;
2298 if(!simple && FRAME_MBAFF){
2300 offset = MB_MBAFF ? 1 : 17;
2301 uvoffset= MB_MBAFF ? 1 : 9;
2305 top_idx = MB_MBAFF ? 0 : 1;
2307 step= MB_MBAFF ? 2 : 1;
2310 if(h->deblocking_filter == 2) {
2312 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2313 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2315 deblock_left = (s->mb_x > 0);
2316 deblock_top = (s->mb_y > !!MB_FIELD);
2319 src_y -= linesize + 1;
2320 src_cb -= uvlinesize + 1;
2321 src_cr -= uvlinesize + 1;
2323 #define XCHG(a,b,t,xchg)\
2330 for(i = !deblock_top; i<16; i++){
2331 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2333 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2337 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2338 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2339 if(s->mb_x+1 < s->mb_width){
2340 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2344 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2346 for(i = !deblock_top; i<8; i++){
2347 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2348 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2350 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2351 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2355 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2360 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2361 MpegEncContext * const s = &h->s;
2362 const int mb_x= s->mb_x;
2363 const int mb_y= s->mb_y;
2364 const int mb_xy= h->mb_xy;
2365 const int mb_type= s->current_picture.mb_type[mb_xy];
2366 uint8_t *dest_y, *dest_cb, *dest_cr;
2367 int linesize, uvlinesize /*dct_offset*/;
2369 int *block_offset = &h->block_offset[0];
2370 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2371 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2372 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2373 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2375 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2376 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2377 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2379 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2380 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2382 if (!simple && MB_FIELD) {
2383 linesize = h->mb_linesize = s->linesize * 2;
2384 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2385 block_offset = &h->block_offset[24];
2386 if(mb_y&1){ //FIXME move out of this function?
2387 dest_y -= s->linesize*15;
2388 dest_cb-= s->uvlinesize*7;
2389 dest_cr-= s->uvlinesize*7;
2393 for(list=0; list<h->list_count; list++){
2394 if(!USES_LIST(mb_type, list))
2396 if(IS_16X16(mb_type)){
2397 int8_t *ref = &h->ref_cache[list][scan8[0]];
2398 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2400 for(i=0; i<16; i+=4){
2401 int ref = h->ref_cache[list][scan8[i]];
2403 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2409 linesize = h->mb_linesize = s->linesize;
2410 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2411 // dct_offset = s->linesize * 16;
2414 if (!simple && IS_INTRA_PCM(mb_type)) {
2415 for (i=0; i<16; i++) {
2416 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2418 for (i=0; i<8; i++) {
2419 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2420 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2423 if(IS_INTRA(mb_type)){
2424 if(h->deblocking_filter)
2425 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2427 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2428 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2429 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2432 if(IS_INTRA4x4(mb_type)){
2433 if(simple || !s->encoding){
2434 if(IS_8x8DCT(mb_type)){
2435 if(transform_bypass){
2437 idct_add = s->dsp.add_pixels8;
2439 idct_dc_add = s->dsp.h264_idct8_dc_add;
2440 idct_add = s->dsp.h264_idct8_add;
2442 for(i=0; i<16; i+=4){
2443 uint8_t * const ptr= dest_y + block_offset[i];
2444 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2445 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2446 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2448 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2449 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2450 (h->topright_samples_available<<i)&0x4000, linesize);
2452 if(nnz == 1 && h->mb[i*16])
2453 idct_dc_add(ptr, h->mb + i*16, linesize);
2455 idct_add (ptr, h->mb + i*16, linesize);
2460 if(transform_bypass){
2462 idct_add = s->dsp.add_pixels4;
2464 idct_dc_add = s->dsp.h264_idct_dc_add;
2465 idct_add = s->dsp.h264_idct_add;
2467 for(i=0; i<16; i++){
2468 uint8_t * const ptr= dest_y + block_offset[i];
2469 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2471 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2472 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2476 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2477 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2478 assert(mb_y || linesize <= block_offset[i]);
2479 if(!topright_avail){
2480 tr= ptr[3 - linesize]*0x01010101;
2481 topright= (uint8_t*) &tr;
2483 topright= ptr + 4 - linesize;
2487 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2488 nnz = h->non_zero_count_cache[ scan8[i] ];
2491 if(nnz == 1 && h->mb[i*16])
2492 idct_dc_add(ptr, h->mb + i*16, linesize);
2494 idct_add (ptr, h->mb + i*16, linesize);
2496 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2503 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2505 if(!transform_bypass)
2506 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2508 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2510 if(h->deblocking_filter)
2511 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2513 hl_motion(h, dest_y, dest_cb, dest_cr,
2514 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2515 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2516 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2520 if(!IS_INTRA4x4(mb_type)){
2522 if(IS_INTRA16x16(mb_type)){
2523 if(transform_bypass){
2524 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2525 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2527 for(i=0; i<16; i++){
2528 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2529 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2533 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2535 }else if(h->cbp&15){
2536 if(transform_bypass){
2537 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2538 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2539 for(i=0; i<16; i+=di){
2540 if(h->non_zero_count_cache[ scan8[i] ]){
2541 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2545 if(IS_8x8DCT(mb_type)){
2546 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2548 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2553 for(i=0; i<16; i++){
2554 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2555 uint8_t * const ptr= dest_y + block_offset[i];
2556 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2562 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2563 uint8_t *dest[2] = {dest_cb, dest_cr};
2564 if(transform_bypass){
2565 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2566 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2567 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2569 idct_add = s->dsp.add_pixels4;
2570 for(i=16; i<16+8; i++){
2571 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2572 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2576 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2577 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2579 idct_add = s->dsp.h264_idct_add;
2580 idct_dc_add = s->dsp.h264_idct_dc_add;
2581 for(i=16; i<16+8; i++){
2582 if(h->non_zero_count_cache[ scan8[i] ])
2583 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2584 else if(h->mb[i*16])
2585 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2588 for(i=16; i<16+8; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2590 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2591 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2598 if(h->cbp || IS_INTRA(mb_type))
2599 s->dsp.clear_blocks(h->mb);
2601 if(h->deblocking_filter) {
2602 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2603 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2604 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2605 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2606 if (!simple && FRAME_MBAFF) {
2607 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2609 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2615 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2617 static void hl_decode_mb_simple(H264Context *h){
2618 hl_decode_mb_internal(h, 1);
2622 * Process a macroblock; this handles edge cases, such as interlacing.
2624 static void av_noinline hl_decode_mb_complex(H264Context *h){
2625 hl_decode_mb_internal(h, 0);
2628 static void hl_decode_mb(H264Context *h){
2629 MpegEncContext * const s = &h->s;
2630 const int mb_xy= h->mb_xy;
2631 const int mb_type= s->current_picture.mb_type[mb_xy];
2632 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2634 if(ENABLE_H264_ENCODER && !s->decode)
2638 hl_decode_mb_complex(h);
2639 else hl_decode_mb_simple(h);
2642 static void pic_as_field(Picture *pic, const int parity){
2644 for (i = 0; i < 4; ++i) {
2645 if (parity == PICT_BOTTOM_FIELD)
2646 pic->data[i] += pic->linesize[i];
2647 pic->reference = parity;
2648 pic->linesize[i] *= 2;
2650 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2653 static int split_field_copy(Picture *dest, Picture *src,
2654 int parity, int id_add){
2655 int match = !!(src->reference & parity);
2659 if(parity != PICT_FRAME){
2660 pic_as_field(dest, parity);
2662 dest->pic_id += id_add;
2669 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2673 while(i[0]<len || i[1]<len){
2674 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2676 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2679 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2680 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2683 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2684 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2691 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2696 best_poc= dir ? INT_MIN : INT_MAX;
2698 for(i=0; i<len; i++){
2699 const int poc= src[i]->poc;
2700 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2702 sorted[out_i]= src[i];
2705 if(best_poc == (dir ? INT_MIN : INT_MAX))
2707 limit= sorted[out_i++]->poc - dir;
2713 * fills the default_ref_list.
2715 static int fill_default_ref_list(H264Context *h){
2716 MpegEncContext * const s = &h->s;
2719 if(h->slice_type_nos==FF_B_TYPE){
2720 Picture *sorted[32];
2725 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2727 cur_poc= s->current_picture_ptr->poc;
2729 for(list= 0; list<2; list++){
2730 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2731 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2733 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2734 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2737 if(len < h->ref_count[list])
2738 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2742 if(lens[0] == lens[1] && lens[1] > 1){
2743 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2745 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2748 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2749 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2751 if(len < h->ref_count[0])
2752 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2755 for (i=0; i<h->ref_count[0]; i++) {
2756 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2758 if(h->slice_type_nos==FF_B_TYPE){
2759 for (i=0; i<h->ref_count[1]; i++) {
2760 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2767 static void print_short_term(H264Context *h);
2768 static void print_long_term(H264Context *h);
2771 * Extract structure information about the picture described by pic_num in
2772 * the current decoding context (frame or field). Note that pic_num is
2773 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2774 * @param pic_num picture number for which to extract structure information
2775 * @param structure one of PICT_XXX describing structure of picture
2777 * @return frame number (short term) or long term index of picture
2778 * described by pic_num
2780 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2781 MpegEncContext * const s = &h->s;
2783 *structure = s->picture_structure;
2786 /* opposite field */
2787 *structure ^= PICT_FRAME;
2794 static int decode_ref_pic_list_reordering(H264Context *h){
2795 MpegEncContext * const s = &h->s;
2796 int list, index, pic_structure;
2798 print_short_term(h);
2801 for(list=0; list<h->list_count; list++){
2802 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2804 if(get_bits1(&s->gb)){
2805 int pred= h->curr_pic_num;
2807 for(index=0; ; index++){
2808 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2809 unsigned int pic_id;
2811 Picture *ref = NULL;
2813 if(reordering_of_pic_nums_idc==3)
2816 if(index >= h->ref_count[list]){
2817 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2821 if(reordering_of_pic_nums_idc<3){
2822 if(reordering_of_pic_nums_idc<2){
2823 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2826 if(abs_diff_pic_num > h->max_pic_num){
2827 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2831 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2832 else pred+= abs_diff_pic_num;
2833 pred &= h->max_pic_num - 1;
2835 frame_num = pic_num_extract(h, pred, &pic_structure);
2837 for(i= h->short_ref_count-1; i>=0; i--){
2838 ref = h->short_ref[i];
2839 assert(ref->reference);
2840 assert(!ref->long_ref);
2842 ref->frame_num == frame_num &&
2843 (ref->reference & pic_structure)
2851 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2853 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2856 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2859 ref = h->long_ref[long_idx];
2860 assert(!(ref && !ref->reference));
2861 if(ref && (ref->reference & pic_structure)){
2862 ref->pic_id= pic_id;
2863 assert(ref->long_ref);
2871 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2872 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2874 for(i=index; i+1<h->ref_count[list]; i++){
2875 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2878 for(; i > index; i--){
2879 h->ref_list[list][i]= h->ref_list[list][i-1];
2881 h->ref_list[list][index]= *ref;
2883 pic_as_field(&h->ref_list[list][index], pic_structure);
2887 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2893 for(list=0; list<h->list_count; list++){
2894 for(index= 0; index < h->ref_count[list]; index++){
2895 if(!h->ref_list[list][index].data[0]){
2896 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2897 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2905 static void fill_mbaff_ref_list(H264Context *h){
2907 for(list=0; list<2; list++){ //FIXME try list_count
2908 for(i=0; i<h->ref_count[list]; i++){
2909 Picture *frame = &h->ref_list[list][i];
2910 Picture *field = &h->ref_list[list][16+2*i];
2913 field[0].linesize[j] <<= 1;
2914 field[0].reference = PICT_TOP_FIELD;
2915 field[0].poc= field[0].field_poc[0];
2916 field[1] = field[0];
2918 field[1].data[j] += frame->linesize[j];
2919 field[1].reference = PICT_BOTTOM_FIELD;
2920 field[1].poc= field[1].field_poc[1];
2922 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2923 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2925 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2926 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2930 for(j=0; j<h->ref_count[1]; j++){
2931 for(i=0; i<h->ref_count[0]; i++)
2932 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2933 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2934 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2938 static int pred_weight_table(H264Context *h){
2939 MpegEncContext * const s = &h->s;
2941 int luma_def, chroma_def;
2944 h->use_weight_chroma= 0;
2945 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2946 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2947 luma_def = 1<<h->luma_log2_weight_denom;
2948 chroma_def = 1<<h->chroma_log2_weight_denom;
2950 for(list=0; list<2; list++){
2951 for(i=0; i<h->ref_count[list]; i++){
2952 int luma_weight_flag, chroma_weight_flag;
2954 luma_weight_flag= get_bits1(&s->gb);
2955 if(luma_weight_flag){
2956 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2957 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2958 if( h->luma_weight[list][i] != luma_def
2959 || h->luma_offset[list][i] != 0)
2962 h->luma_weight[list][i]= luma_def;
2963 h->luma_offset[list][i]= 0;
2967 chroma_weight_flag= get_bits1(&s->gb);
2968 if(chroma_weight_flag){
2971 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2972 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2973 if( h->chroma_weight[list][i][j] != chroma_def
2974 || h->chroma_offset[list][i][j] != 0)
2975 h->use_weight_chroma= 1;
2980 h->chroma_weight[list][i][j]= chroma_def;
2981 h->chroma_offset[list][i][j]= 0;
2986 if(h->slice_type_nos != FF_B_TYPE) break;
2988 h->use_weight= h->use_weight || h->use_weight_chroma;
2992 static void implicit_weight_table(H264Context *h){
2993 MpegEncContext * const s = &h->s;
2995 int cur_poc = s->current_picture_ptr->poc;
2997 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2998 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3000 h->use_weight_chroma= 0;
3005 h->use_weight_chroma= 2;
3006 h->luma_log2_weight_denom= 5;
3007 h->chroma_log2_weight_denom= 5;
3009 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3010 int poc0 = h->ref_list[0][ref0].poc;
3011 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3012 int poc1 = h->ref_list[1][ref1].poc;
3013 int td = av_clip(poc1 - poc0, -128, 127);
3015 int tb = av_clip(cur_poc - poc0, -128, 127);
3016 int tx = (16384 + (FFABS(td) >> 1)) / td;
3017 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3018 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3019 h->implicit_weight[ref0][ref1] = 32;
3021 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3023 h->implicit_weight[ref0][ref1] = 32;
3029 * Mark a picture as no longer needed for reference. The refmask
3030 * argument allows unreferencing of individual fields or the whole frame.
3031 * If the picture becomes entirely unreferenced, but is being held for
3032 * display purposes, it is marked as such.
3033 * @param refmask mask of fields to unreference; the mask is bitwise
3034 * anded with the reference marking of pic
3035 * @return non-zero if pic becomes entirely unreferenced (except possibly
3036 * for display purposes) zero if one of the fields remains in
3039 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3041 if (pic->reference &= refmask) {
3044 for(i = 0; h->delayed_pic[i]; i++)
3045 if(pic == h->delayed_pic[i]){
3046 pic->reference=DELAYED_PIC_REF;
3054 * instantaneous decoder refresh.
3056 static void idr(H264Context *h){
3059 for(i=0; i<16; i++){
3060 remove_long(h, i, 0);
3062 assert(h->long_ref_count==0);
3064 for(i=0; i<h->short_ref_count; i++){
3065 unreference_pic(h, h->short_ref[i], 0);
3066 h->short_ref[i]= NULL;
3068 h->short_ref_count=0;
3069 h->prev_frame_num= 0;
3070 h->prev_frame_num_offset= 0;
3075 /* forget old pics after a seek */
3076 static void flush_dpb(AVCodecContext *avctx){
3077 H264Context *h= avctx->priv_data;
3079 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3080 if(h->delayed_pic[i])
3081 h->delayed_pic[i]->reference= 0;
3082 h->delayed_pic[i]= NULL;
3084 h->outputed_poc= INT_MIN;
3086 if(h->s.current_picture_ptr)
3087 h->s.current_picture_ptr->reference= 0;
3088 h->s.first_field= 0;
3089 ff_mpeg_flush(avctx);
3093 * Find a Picture in the short term reference list by frame number.
3094 * @param frame_num frame number to search for
3095 * @param idx the index into h->short_ref where returned picture is found
3096 * undefined if no picture found.
3097 * @return pointer to the found picture, or NULL if no pic with the provided
3098 * frame number is found
3100 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3101 MpegEncContext * const s = &h->s;
3104 for(i=0; i<h->short_ref_count; i++){
3105 Picture *pic= h->short_ref[i];
3106 if(s->avctx->debug&FF_DEBUG_MMCO)
3107 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3108 if(pic->frame_num == frame_num) {
3117 * Remove a picture from the short term reference list by its index in
3118 * that list. This does no checking on the provided index; it is assumed
3119 * to be valid. Other list entries are shifted down.
3120 * @param i index into h->short_ref of picture to remove.
3122 static void remove_short_at_index(H264Context *h, int i){
3123 assert(i >= 0 && i < h->short_ref_count);
3124 h->short_ref[i]= NULL;
3125 if (--h->short_ref_count)
3126 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3131 * @return the removed picture or NULL if an error occurs
3133 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3134 MpegEncContext * const s = &h->s;
3138 if(s->avctx->debug&FF_DEBUG_MMCO)
3139 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3141 pic = find_short(h, frame_num, &i);
3143 if(unreference_pic(h, pic, ref_mask))
3144 remove_short_at_index(h, i);
3151 * Remove a picture from the long term reference list by its index in
3153 * @return the removed picture or NULL if an error occurs
3155 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3158 pic= h->long_ref[i];
3160 if(unreference_pic(h, pic, ref_mask)){
3161 assert(h->long_ref[i]->long_ref == 1);
3162 h->long_ref[i]->long_ref= 0;
3163 h->long_ref[i]= NULL;
3164 h->long_ref_count--;
3172 * print short term list
3174 static void print_short_term(H264Context *h) {
3176 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3177 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3178 for(i=0; i<h->short_ref_count; i++){
3179 Picture *pic= h->short_ref[i];
3180 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3186 * print long term list
3188 static void print_long_term(H264Context *h) {
3190 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3191 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3192 for(i = 0; i < 16; i++){
3193 Picture *pic= h->long_ref[i];
3195 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3202 * Executes the reference picture marking (memory management control operations).
3204 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3205 MpegEncContext * const s = &h->s;
3207 int current_ref_assigned=0;
3210 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3211 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3213 for(i=0; i<mmco_count; i++){
3214 int structure, frame_num;
3215 if(s->avctx->debug&FF_DEBUG_MMCO)
3216 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3218 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3219 || mmco[i].opcode == MMCO_SHORT2LONG){
3220 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3221 pic = find_short(h, frame_num, &j);
3223 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3224 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3225 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3230 switch(mmco[i].opcode){
3231 case MMCO_SHORT2UNUSED:
3232 if(s->avctx->debug&FF_DEBUG_MMCO)
3233 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3234 remove_short(h, frame_num, structure ^ PICT_FRAME);
3236 case MMCO_SHORT2LONG:
3237 if (h->long_ref[mmco[i].long_arg] != pic)
3238 remove_long(h, mmco[i].long_arg, 0);
3240 remove_short_at_index(h, j);
3241 h->long_ref[ mmco[i].long_arg ]= pic;
3242 if (h->long_ref[ mmco[i].long_arg ]){
3243 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3244 h->long_ref_count++;
3247 case MMCO_LONG2UNUSED:
3248 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3249 pic = h->long_ref[j];
3251 remove_long(h, j, structure ^ PICT_FRAME);
3252 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3253 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3256 // Comment below left from previous code as it is an interresting note.
3257 /* First field in pair is in short term list or
3258 * at a different long term index.
3259 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3260 * Report the problem and keep the pair where it is,
3261 * and mark this field valid.
3264 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3265 remove_long(h, mmco[i].long_arg, 0);
3267 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3268 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3269 h->long_ref_count++;
3272 s->current_picture_ptr->reference |= s->picture_structure;
3273 current_ref_assigned=1;
3275 case MMCO_SET_MAX_LONG:
3276 assert(mmco[i].long_arg <= 16);
3277 // just remove the long term which index is greater than new max
3278 for(j = mmco[i].long_arg; j<16; j++){
3279 remove_long(h, j, 0);
3283 while(h->short_ref_count){
3284 remove_short(h, h->short_ref[0]->frame_num, 0);
3286 for(j = 0; j < 16; j++) {
3287 remove_long(h, j, 0);
3289 s->current_picture_ptr->poc=
3290 s->current_picture_ptr->field_poc[0]=
3291 s->current_picture_ptr->field_poc[1]=
3295 s->current_picture_ptr->frame_num= 0;
3301 if (!current_ref_assigned) {
3302 /* Second field of complementary field pair; the first field of
3303 * which is already referenced. If short referenced, it
3304 * should be first entry in short_ref. If not, it must exist
3305 * in long_ref; trying to put it on the short list here is an
3306 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3308 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3309 /* Just mark the second field valid */
3310 s->current_picture_ptr->reference = PICT_FRAME;
3311 } else if (s->current_picture_ptr->long_ref) {
3312 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3313 "assignment for second field "
3314 "in complementary field pair "
3315 "(first field is long term)\n");
3317 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3319 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3322 if(h->short_ref_count)
3323 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3325 h->short_ref[0]= s->current_picture_ptr;
3326 h->short_ref_count++;
3327 s->current_picture_ptr->reference |= s->picture_structure;
3331 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3333 /* We have too many reference frames, probably due to corrupted
3334 * stream. Need to discard one frame. Prevents overrun of the
3335 * short_ref and long_ref buffers.
3337 av_log(h->s.avctx, AV_LOG_ERROR,
3338 "number of reference frames exceeds max (probably "
3339 "corrupt input), discarding one\n");
3341 if (h->long_ref_count && !h->short_ref_count) {
3342 for (i = 0; i < 16; ++i)
3347 remove_long(h, i, 0);
3349 pic = h->short_ref[h->short_ref_count - 1];
3350 remove_short(h, pic->frame_num, 0);
3354 print_short_term(h);
3359 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3360 MpegEncContext * const s = &h->s;
3364 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3365 s->broken_link= get_bits1(gb) -1;
3367 h->mmco[0].opcode= MMCO_LONG;
3368 h->mmco[0].long_arg= 0;
3372 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3373 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3374 MMCOOpcode opcode= get_ue_golomb(gb);
3376 h->mmco[i].opcode= opcode;
3377 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3378 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3379 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3380 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3384 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3385 unsigned int long_arg= get_ue_golomb(gb);
3386 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3387 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3390 h->mmco[i].long_arg= long_arg;
3393 if(opcode > (unsigned)MMCO_LONG){
3394 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3397 if(opcode == MMCO_END)
3402 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3404 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3405 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3406 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3407 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3409 if (FIELD_PICTURE) {
3410 h->mmco[0].short_pic_num *= 2;
3411 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3412 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3422 static int init_poc(H264Context *h){
3423 MpegEncContext * const s = &h->s;
3424 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3426 Picture *cur = s->current_picture_ptr;
3428 h->frame_num_offset= h->prev_frame_num_offset;
3429 if(h->frame_num < h->prev_frame_num)
3430 h->frame_num_offset += max_frame_num;
3432 if(h->sps.poc_type==0){
3433 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3435 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3436 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3437 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3438 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3440 h->poc_msb = h->prev_poc_msb;
3441 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3443 field_poc[1] = h->poc_msb + h->poc_lsb;
3444 if(s->picture_structure == PICT_FRAME)
3445 field_poc[1] += h->delta_poc_bottom;
3446 }else if(h->sps.poc_type==1){
3447 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3450 if(h->sps.poc_cycle_length != 0)
3451 abs_frame_num = h->frame_num_offset + h->frame_num;
3455 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3458 expected_delta_per_poc_cycle = 0;
3459 for(i=0; i < h->sps.poc_cycle_length; i++)
3460 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3462 if(abs_frame_num > 0){
3463 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3464 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3466 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3467 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3468 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3472 if(h->nal_ref_idc == 0)
3473 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3475 field_poc[0] = expectedpoc + h->delta_poc[0];
3476 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3478 if(s->picture_structure == PICT_FRAME)
3479 field_poc[1] += h->delta_poc[1];
3481 int poc= 2*(h->frame_num_offset + h->frame_num);
3490 if(s->picture_structure != PICT_BOTTOM_FIELD)
3491 s->current_picture_ptr->field_poc[0]= field_poc[0];
3492 if(s->picture_structure != PICT_TOP_FIELD)
3493 s->current_picture_ptr->field_poc[1]= field_poc[1];
3494 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3501 * initialize scan tables
3503 static void init_scan_tables(H264Context *h){
3504 MpegEncContext * const s = &h->s;
3506 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3507 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3508 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3510 for(i=0; i<16; i++){
3511 #define T(x) (x>>2) | ((x<<2) & 0xF)
3512 h->zigzag_scan[i] = T(zigzag_scan[i]);
3513 h-> field_scan[i] = T( field_scan[i]);
3517 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3518 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3519 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3520 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3521 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3523 for(i=0; i<64; i++){
3524 #define T(x) (x>>3) | ((x&7)<<3)
3525 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3526 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3527 h->field_scan8x8[i] = T(field_scan8x8[i]);
3528 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3532 if(h->sps.transform_bypass){ //FIXME same ugly
3533 h->zigzag_scan_q0 = zigzag_scan;
3534 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3535 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3536 h->field_scan_q0 = field_scan;
3537 h->field_scan8x8_q0 = field_scan8x8;
3538 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3540 h->zigzag_scan_q0 = h->zigzag_scan;
3541 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3542 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3543 h->field_scan_q0 = h->field_scan;
3544 h->field_scan8x8_q0 = h->field_scan8x8;
3545 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3550 * Replicates H264 "master" context to thread contexts.
3552 static void clone_slice(H264Context *dst, H264Context *src)
3554 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3555 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3556 dst->s.current_picture = src->s.current_picture;
3557 dst->s.linesize = src->s.linesize;
3558 dst->s.uvlinesize = src->s.uvlinesize;
3559 dst->s.first_field = src->s.first_field;
3561 dst->prev_poc_msb = src->prev_poc_msb;
3562 dst->prev_poc_lsb = src->prev_poc_lsb;
3563 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3564 dst->prev_frame_num = src->prev_frame_num;
3565 dst->short_ref_count = src->short_ref_count;
3567 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3568 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3569 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3570 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3572 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3573 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3577 * decodes a slice header.
3578 * This will also call MPV_common_init() and frame_start() as needed.
3580 * @param h h264context
3581 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3583 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3585 static int decode_slice_header(H264Context *h, H264Context *h0){
3586 MpegEncContext * const s = &h->s;
3587 MpegEncContext * const s0 = &h0->s;
3588 unsigned int first_mb_in_slice;
3589 unsigned int pps_id;
3590 int num_ref_idx_active_override_flag;
3591 unsigned int slice_type, tmp, i, j;
3592 int default_ref_list_done = 0;
3593 int last_pic_structure;
3595 s->dropable= h->nal_ref_idc == 0;
3597 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3598 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3599 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3601 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3602 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3605 first_mb_in_slice= get_ue_golomb(&s->gb);
3607 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3608 h0->current_slice = 0;
3609 if (!s0->first_field)
3610 s->current_picture_ptr= NULL;
3613 slice_type= get_ue_golomb(&s->gb);
3615 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3620 h->slice_type_fixed=1;
3622 h->slice_type_fixed=0;
3624 slice_type= golomb_to_pict_type[ slice_type ];
3625 if (slice_type == FF_I_TYPE
3626 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3627 default_ref_list_done = 1;
3629 h->slice_type= slice_type;
3630 h->slice_type_nos= slice_type & 3;
3632 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3633 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3634 av_log(h->s.avctx, AV_LOG_ERROR,
3635 "B picture before any references, skipping\n");
3639 pps_id= get_ue_golomb(&s->gb);
3640 if(pps_id>=MAX_PPS_COUNT){
3641 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3644 if(!h0->pps_buffers[pps_id]) {
3645 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3648 h->pps= *h0->pps_buffers[pps_id];
3650 if(!h0->sps_buffers[h->pps.sps_id]) {
3651 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3654 h->sps = *h0->sps_buffers[h->pps.sps_id];
3656 if(h == h0 && h->dequant_coeff_pps != pps_id){
3657 h->dequant_coeff_pps = pps_id;
3658 init_dequant_tables(h);
3661 s->mb_width= h->sps.mb_width;
3662 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3664 h->b_stride= s->mb_width*4;
3665 h->b8_stride= s->mb_width*2;
3667 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3668 if(h->sps.frame_mbs_only_flag)
3669 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3671 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3673 if (s->context_initialized
3674 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3676 return -1; // width / height changed during parallelized decoding
3678 flush_dpb(s->avctx);
3681 if (!s->context_initialized) {
3683 return -1; // we cant (re-)initialize context during parallel decoding
3684 if (MPV_common_init(s) < 0)
3688 init_scan_tables(h);
3691 for(i = 1; i < s->avctx->thread_count; i++) {
3693 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3694 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3695 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3698 init_scan_tables(c);
3702 for(i = 0; i < s->avctx->thread_count; i++)
3703 if(context_init(h->thread_context[i]) < 0)
3706 s->avctx->width = s->width;
3707 s->avctx->height = s->height;
3708 s->avctx->sample_aspect_ratio= h->sps.sar;
3709 if(!s->avctx->sample_aspect_ratio.den)
3710 s->avctx->sample_aspect_ratio.den = 1;
3712 if(h->sps.timing_info_present_flag){
3713 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3714 if(h->x264_build > 0 && h->x264_build < 44)
3715 s->avctx->time_base.den *= 2;
3716 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3717 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3721 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3724 h->mb_aff_frame = 0;
3725 last_pic_structure = s0->picture_structure;
3726 if(h->sps.frame_mbs_only_flag){
3727 s->picture_structure= PICT_FRAME;
3729 if(get_bits1(&s->gb)) { //field_pic_flag
3730 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3732 s->picture_structure= PICT_FRAME;
3733 h->mb_aff_frame = h->sps.mb_aff;
3736 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3738 if(h0->current_slice == 0){
3739 while(h->frame_num != h->prev_frame_num &&
3740 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3741 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3743 h->prev_frame_num++;
3744 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3745 s->current_picture_ptr->frame_num= h->prev_frame_num;
3746 execute_ref_pic_marking(h, NULL, 0);
3749 /* See if we have a decoded first field looking for a pair... */
3750 if (s0->first_field) {
3751 assert(s0->current_picture_ptr);
3752 assert(s0->current_picture_ptr->data[0]);
3753 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3755 /* figure out if we have a complementary field pair */
3756 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3758 * Previous field is unmatched. Don't display it, but let it
3759 * remain for reference if marked as such.
3761 s0->current_picture_ptr = NULL;
3762 s0->first_field = FIELD_PICTURE;
3765 if (h->nal_ref_idc &&
3766 s0->current_picture_ptr->reference &&
3767 s0->current_picture_ptr->frame_num != h->frame_num) {
3769 * This and previous field were reference, but had
3770 * different frame_nums. Consider this field first in
3771 * pair. Throw away previous field except for reference
3774 s0->first_field = 1;
3775 s0->current_picture_ptr = NULL;
3778 /* Second field in complementary pair */
3779 s0->first_field = 0;
3784 /* Frame or first field in a potentially complementary pair */
3785 assert(!s0->current_picture_ptr);
3786 s0->first_field = FIELD_PICTURE;
3789 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3790 s0->first_field = 0;
3797 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3799 assert(s->mb_num == s->mb_width * s->mb_height);
3800 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3801 first_mb_in_slice >= s->mb_num){
3802 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3805 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3806 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3807 if (s->picture_structure == PICT_BOTTOM_FIELD)
3808 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3809 assert(s->mb_y < s->mb_height);
3811 if(s->picture_structure==PICT_FRAME){
3812 h->curr_pic_num= h->frame_num;
3813 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3815 h->curr_pic_num= 2*h->frame_num + 1;
3816 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3819 if(h->nal_unit_type == NAL_IDR_SLICE){
3820 get_ue_golomb(&s->gb); /* idr_pic_id */
3823 if(h->sps.poc_type==0){
3824 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3826 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3827 h->delta_poc_bottom= get_se_golomb(&s->gb);
3831 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3832 h->delta_poc[0]= get_se_golomb(&s->gb);
3834 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3835 h->delta_poc[1]= get_se_golomb(&s->gb);
3840 if(h->pps.redundant_pic_cnt_present){
3841 h->redundant_pic_count= get_ue_golomb(&s->gb);
3844 //set defaults, might be overridden a few lines later
3845 h->ref_count[0]= h->pps.ref_count[0];
3846 h->ref_count[1]= h->pps.ref_count[1];
3848 if(h->slice_type_nos != FF_I_TYPE){
3849 if(h->slice_type_nos == FF_B_TYPE){
3850 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3852 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3854 if(num_ref_idx_active_override_flag){
3855 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3856 if(h->slice_type_nos==FF_B_TYPE)
3857 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3859 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3860 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3861 h->ref_count[0]= h->ref_count[1]= 1;
3865 if(h->slice_type_nos == FF_B_TYPE)
3872 if(!default_ref_list_done){
3873 fill_default_ref_list(h);
3876 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3879 if(h->slice_type_nos!=FF_I_TYPE){
3880 s->last_picture_ptr= &h->ref_list[0][0];
3881 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3883 if(h->slice_type_nos==FF_B_TYPE){
3884 s->next_picture_ptr= &h->ref_list[1][0];
3885 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3888 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3889 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3890 pred_weight_table(h);
3891 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3892 implicit_weight_table(h);
3897 decode_ref_pic_marking(h0, &s->gb);
3900 fill_mbaff_ref_list(h);
3902 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3903 direct_dist_scale_factor(h);
3904 direct_ref_list_init(h);
3906 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3907 tmp = get_ue_golomb(&s->gb);
3909 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3912 h->cabac_init_idc= tmp;
3915 h->last_qscale_diff = 0;
3916 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3918 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3922 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3923 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3924 //FIXME qscale / qp ... stuff
3925 if(h->slice_type == FF_SP_TYPE){
3926 get_bits1(&s->gb); /* sp_for_switch_flag */
3928 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3929 get_se_golomb(&s->gb); /* slice_qs_delta */
3932 h->deblocking_filter = 1;
3933 h->slice_alpha_c0_offset = 0;
3934 h->slice_beta_offset = 0;
3935 if( h->pps.deblocking_filter_parameters_present ) {
3936 tmp= get_ue_golomb(&s->gb);
3938 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3941 h->deblocking_filter= tmp;
3942 if(h->deblocking_filter < 2)
3943 h->deblocking_filter^= 1; // 1<->0
3945 if( h->deblocking_filter ) {
3946 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3947 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3951 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3952 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3953 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3954 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3955 h->deblocking_filter= 0;
3957 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3958 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3959 /* Cheat slightly for speed:
3960 Do not bother to deblock across slices. */
3961 h->deblocking_filter = 2;
3963 h0->max_contexts = 1;
3964 if(!h0->single_decode_warning) {
3965 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3966 h0->single_decode_warning = 1;
3969 return 1; // deblocking switched inside frame
3974 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3975 slice_group_change_cycle= get_bits(&s->gb, ?);
3978 h0->last_slice_type = slice_type;
3979 h->slice_num = ++h0->current_slice;
3980 if(h->slice_num >= MAX_SLICES){
3981 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3985 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3989 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3990 +(h->ref_list[j][i].reference&3);
3993 for(i=16; i<48; i++)
3994 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3995 +(h->ref_list[j][i].reference&3);
3998 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3999 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4001 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4002 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4004 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4006 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4007 pps_id, h->frame_num,
4008 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4009 h->ref_count[0], h->ref_count[1],
4011 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4013 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4014 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4024 static inline int get_level_prefix(GetBitContext *gb){
4028 OPEN_READER(re, gb);
4029 UPDATE_CACHE(re, gb);
4030 buf=GET_CACHE(re, gb);
4032 log= 32 - av_log2(buf);
4034 print_bin(buf>>(32-log), log);
4035 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4038 LAST_SKIP_BITS(re, gb, log);
4039 CLOSE_READER(re, gb);
4044 static inline int get_dct8x8_allowed(H264Context *h){
4045 if(h->sps.direct_8x8_inference_flag)
4046 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4048 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4052 * decodes a residual block.
4053 * @param n block index
4054 * @param scantable scantable
4055 * @param max_coeff number of coefficients in the block
4056 * @return <0 if an error occurred
4058 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4059 MpegEncContext * const s = &h->s;
4060 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4062 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4064 //FIXME put trailing_onex into the context
4066 if(n == CHROMA_DC_BLOCK_INDEX){
4067 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4068 total_coeff= coeff_token>>2;
4070 if(n == LUMA_DC_BLOCK_INDEX){
4071 total_coeff= pred_non_zero_count(h, 0);
4072 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4073 total_coeff= coeff_token>>2;
4075 total_coeff= pred_non_zero_count(h, n);
4076 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4077 total_coeff= coeff_token>>2;
4078 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4082 //FIXME set last_non_zero?
4086 if(total_coeff > (unsigned)max_coeff) {
4087 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4091 trailing_ones= coeff_token&3;
4092 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4093 assert(total_coeff<=16);
4095 i = show_bits(gb, 3);
4096 skip_bits(gb, trailing_ones);
4097 level[0] = 1-((i&4)>>1);
4098 level[1] = 1-((i&2) );
4099 level[2] = 1-((i&1)<<1);
4101 if(trailing_ones<total_coeff) {
4102 int level_code, mask;
4103 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4104 int prefix= get_level_prefix(gb);
4106 //first coefficient has suffix_length equal to 0 or 1
4107 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4109 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4111 level_code= (prefix<<suffix_length); //part
4112 }else if(prefix==14){
4114 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4116 level_code= prefix + get_bits(gb, 4); //part
4118 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4119 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4121 level_code += (1<<(prefix-3))-4096;
4124 if(trailing_ones < 3) level_code += 2;
4129 mask= -(level_code&1);
4130 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4132 //remaining coefficients have suffix_length > 0
4133 for(i=trailing_ones+1;i<total_coeff;i++) {
4134 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4135 prefix = get_level_prefix(gb);
4137 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4139 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4141 level_code += (1<<(prefix-3))-4096;
4143 mask= -(level_code&1);
4144 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4145 if(level_code > suffix_limit[suffix_length])
4150 if(total_coeff == max_coeff)
4153 if(n == CHROMA_DC_BLOCK_INDEX)
4154 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4156 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4159 coeff_num = zeros_left + total_coeff - 1;
4160 j = scantable[coeff_num];
4162 block[j] = level[0];
4163 for(i=1;i<total_coeff;i++) {
4166 else if(zeros_left < 7){
4167 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4169 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4171 zeros_left -= run_before;
4172 coeff_num -= 1 + run_before;
4173 j= scantable[ coeff_num ];
4178 block[j] = (level[0] * qmul[j] + 32)>>6;
4179 for(i=1;i<total_coeff;i++) {
4182 else if(zeros_left < 7){
4183 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4185 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4187 zeros_left -= run_before;
4188 coeff_num -= 1 + run_before;
4189 j= scantable[ coeff_num ];
4191 block[j]= (level[i] * qmul[j] + 32)>>6;
4196 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4203 static void predict_field_decoding_flag(H264Context *h){
4204 MpegEncContext * const s = &h->s;
4205 const int mb_xy= h->mb_xy;
4206 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4207 ? s->current_picture.mb_type[mb_xy-1]
4208 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4209 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4211 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4215 * decodes a P_SKIP or B_SKIP macroblock
4217 static void decode_mb_skip(H264Context *h){
4218 MpegEncContext * const s = &h->s;
4219 const int mb_xy= h->mb_xy;
4222 memset(h->non_zero_count[mb_xy], 0, 16);
4223 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4226 mb_type|= MB_TYPE_INTERLACED;
4228 if( h->slice_type_nos == FF_B_TYPE )
4230 // just for fill_caches. pred_direct_motion will set the real mb_type
4231 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4233 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4234 pred_direct_motion(h, &mb_type);
4235 mb_type|= MB_TYPE_SKIP;
4240 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4242 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4243 pred_pskip_motion(h, &mx, &my);
4244 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4245 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4248 write_back_motion(h, mb_type);
4249 s->current_picture.mb_type[mb_xy]= mb_type;
4250 s->current_picture.qscale_table[mb_xy]= s->qscale;
4251 h->slice_table[ mb_xy ]= h->slice_num;
4252 h->prev_mb_skipped= 1;
4256 * decodes a macroblock
4257 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4259 static int decode_mb_cavlc(H264Context *h){
4260 MpegEncContext * const s = &h->s;
4262 int partition_count;
4263 unsigned int mb_type, cbp;
4264 int dct8x8_allowed= h->pps.transform_8x8_mode;
4266 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4268 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4269 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4271 if(h->slice_type_nos != FF_I_TYPE){
4272 if(s->mb_skip_run==-1)
4273 s->mb_skip_run= get_ue_golomb(&s->gb);
4275 if (s->mb_skip_run--) {
4276 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4277 if(s->mb_skip_run==0)
4278 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4280 predict_field_decoding_flag(h);
4287 if( (s->mb_y&1) == 0 )
4288 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4291 h->prev_mb_skipped= 0;
4293 mb_type= get_ue_golomb(&s->gb);
4294 if(h->slice_type_nos == FF_B_TYPE){
4296 partition_count= b_mb_type_info[mb_type].partition_count;
4297 mb_type= b_mb_type_info[mb_type].type;
4300 goto decode_intra_mb;
4302 }else if(h->slice_type_nos == FF_P_TYPE){
4304 partition_count= p_mb_type_info[mb_type].partition_count;
4305 mb_type= p_mb_type_info[mb_type].type;
4308 goto decode_intra_mb;
4311 assert(h->slice_type_nos == FF_I_TYPE);
4312 if(h->slice_type == FF_SI_TYPE && mb_type)
4316 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4320 cbp= i_mb_type_info[mb_type].cbp;
4321 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4322 mb_type= i_mb_type_info[mb_type].type;
4326 mb_type |= MB_TYPE_INTERLACED;
4328 h->slice_table[ mb_xy ]= h->slice_num;
4330 if(IS_INTRA_PCM(mb_type)){
4333 // We assume these blocks are very rare so we do not optimize it.
4334 align_get_bits(&s->gb);
4336 // The pixels are stored in the same order as levels in h->mb array.
4337 for(x=0; x < (CHROMA ? 384 : 256); x++){
4338 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4341 // In deblocking, the quantizer is 0
4342 s->current_picture.qscale_table[mb_xy]= 0;
4343 // All coeffs are present
4344 memset(h->non_zero_count[mb_xy], 16, 16);
4346 s->current_picture.mb_type[mb_xy]= mb_type;
4351 h->ref_count[0] <<= 1;
4352 h->ref_count[1] <<= 1;
4355 fill_caches(h, mb_type, 0);
4358 if(IS_INTRA(mb_type)){
4360 // init_top_left_availability(h);
4361 if(IS_INTRA4x4(mb_type)){
4364 if(dct8x8_allowed && get_bits1(&s->gb)){
4365 mb_type |= MB_TYPE_8x8DCT;
4369 // fill_intra4x4_pred_table(h);
4370 for(i=0; i<16; i+=di){
4371 int mode= pred_intra_mode(h, i);
4373 if(!get_bits1(&s->gb)){
4374 const int rem_mode= get_bits(&s->gb, 3);
4375 mode = rem_mode + (rem_mode >= mode);
4379 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4381 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4383 write_back_intra_pred_mode(h);
4384 if( check_intra4x4_pred_mode(h) < 0)
4387 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4388 if(h->intra16x16_pred_mode < 0)
4392 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4395 h->chroma_pred_mode= pred_mode;
4397 }else if(partition_count==4){
4398 int i, j, sub_partition_count[4], list, ref[2][4];
4400 if(h->slice_type_nos == FF_B_TYPE){
4402 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4403 if(h->sub_mb_type[i] >=13){
4404 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4407 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4408 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4410 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4411 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4412 pred_direct_motion(h, &mb_type);
4413 h->ref_cache[0][scan8[4]] =
4414 h->ref_cache[1][scan8[4]] =
4415 h->ref_cache[0][scan8[12]] =
4416 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4419 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4421 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4422 if(h->sub_mb_type[i] >=4){
4423 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4426 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4427 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4431 for(list=0; list<h->list_count; list++){
4432 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4434 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4435 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4436 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4438 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4450 dct8x8_allowed = get_dct8x8_allowed(h);
4452 for(list=0; list<h->list_count; list++){
4454 if(IS_DIRECT(h->sub_mb_type[i])) {
4455 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4458 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4459 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4461 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4462 const int sub_mb_type= h->sub_mb_type[i];
4463 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4464 for(j=0; j<sub_partition_count[i]; j++){
4466 const int index= 4*i + block_width*j;
4467 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4468 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4469 mx += get_se_golomb(&s->gb);
4470 my += get_se_golomb(&s->gb);
4471 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4473 if(IS_SUB_8X8(sub_mb_type)){
4475 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4477 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4478 }else if(IS_SUB_8X4(sub_mb_type)){
4479 mv_cache[ 1 ][0]= mx;
4480 mv_cache[ 1 ][1]= my;
4481 }else if(IS_SUB_4X8(sub_mb_type)){
4482 mv_cache[ 8 ][0]= mx;
4483 mv_cache[ 8 ][1]= my;
4485 mv_cache[ 0 ][0]= mx;
4486 mv_cache[ 0 ][1]= my;
4489 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4495 }else if(IS_DIRECT(mb_type)){
4496 pred_direct_motion(h, &mb_type);
4497 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4499 int list, mx, my, i;
4500 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4501 if(IS_16X16(mb_type)){
4502 for(list=0; list<h->list_count; list++){
4504 if(IS_DIR(mb_type, 0, list)){
4505 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4506 if(val >= h->ref_count[list]){
4507 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4511 val= LIST_NOT_USED&0xFF;
4512 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4514 for(list=0; list<h->list_count; list++){
4516 if(IS_DIR(mb_type, 0, list)){
4517 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4518 mx += get_se_golomb(&s->gb);
4519 my += get_se_golomb(&s->gb);
4520 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4522 val= pack16to32(mx,my);
4525 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4528 else if(IS_16X8(mb_type)){
4529 for(list=0; list<h->list_count; list++){
4532 if(IS_DIR(mb_type, i, list)){
4533 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4534 if(val >= h->ref_count[list]){
4535 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4539 val= LIST_NOT_USED&0xFF;
4540 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4543 for(list=0; list<h->list_count; list++){
4546 if(IS_DIR(mb_type, i, list)){
4547 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4548 mx += get_se_golomb(&s->gb);
4549 my += get_se_golomb(&s->gb);
4550 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4552 val= pack16to32(mx,my);
4555 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4559 assert(IS_8X16(mb_type));
4560 for(list=0; list<h->list_count; list++){
4563 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4564 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4565 if(val >= h->ref_count[list]){
4566 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4570 val= LIST_NOT_USED&0xFF;
4571 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4574 for(list=0; list<h->list_count; list++){
4577 if(IS_DIR(mb_type, i, list)){
4578 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4579 mx += get_se_golomb(&s->gb);
4580 my += get_se_golomb(&s->gb);
4581 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4583 val= pack16to32(mx,my);
4586 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4592 if(IS_INTER(mb_type))
4593 write_back_motion(h, mb_type);
4595 if(!IS_INTRA16x16(mb_type)){
4596 cbp= get_ue_golomb(&s->gb);
4598 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4603 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4604 else cbp= golomb_to_inter_cbp [cbp];
4606 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4607 else cbp= golomb_to_inter_cbp_gray[cbp];
4612 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4613 if(get_bits1(&s->gb)){
4614 mb_type |= MB_TYPE_8x8DCT;
4615 h->cbp_table[mb_xy]= cbp;
4618 s->current_picture.mb_type[mb_xy]= mb_type;
4620 if(cbp || IS_INTRA16x16(mb_type)){
4621 int i8x8, i4x4, chroma_idx;
4623 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4624 const uint8_t *scan, *scan8x8, *dc_scan;
4626 // fill_non_zero_count_cache(h);
4628 if(IS_INTERLACED(mb_type)){
4629 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4630 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4631 dc_scan= luma_dc_field_scan;
4633 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4634 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4635 dc_scan= luma_dc_zigzag_scan;
4638 dquant= get_se_golomb(&s->gb);
4640 if( dquant > 25 || dquant < -26 ){
4641 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4645 s->qscale += dquant;
4646 if(((unsigned)s->qscale) > 51){
4647 if(s->qscale<0) s->qscale+= 52;
4648 else s->qscale-= 52;
4651 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4652 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4653 if(IS_INTRA16x16(mb_type)){
4654 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4655 return -1; //FIXME continue if partitioned and other return -1 too
4658 assert((cbp&15) == 0 || (cbp&15) == 15);
4661 for(i8x8=0; i8x8<4; i8x8++){
4662 for(i4x4=0; i4x4<4; i4x4++){
4663 const int index= i4x4 + 4*i8x8;
4664 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4670 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4673 for(i8x8=0; i8x8<4; i8x8++){
4674 if(cbp & (1<<i8x8)){
4675 if(IS_8x8DCT(mb_type)){
4676 DCTELEM *buf = &h->mb[64*i8x8];
4678 for(i4x4=0; i4x4<4; i4x4++){
4679 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4680 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4683 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4684 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4686 for(i4x4=0; i4x4<4; i4x4++){
4687 const int index= i4x4 + 4*i8x8;
4689 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4695 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4696 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4702 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4703 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4709 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4710 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4711 for(i4x4=0; i4x4<4; i4x4++){
4712 const int index= 16 + 4*chroma_idx + i4x4;
4713 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4719 uint8_t * const nnz= &h->non_zero_count_cache[0];
4720 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4721 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4724 uint8_t * const nnz= &h->non_zero_count_cache[0];
4725 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4726 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4727 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4729 s->current_picture.qscale_table[mb_xy]= s->qscale;
4730 write_back_non_zero_count(h);
4733 h->ref_count[0] >>= 1;
4734 h->ref_count[1] >>= 1;
4740 static int decode_cabac_field_decoding_flag(H264Context *h) {
4741 MpegEncContext * const s = &h->s;
4742 const int mb_x = s->mb_x;
4743 const int mb_y = s->mb_y & ~1;
4744 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4745 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4747 unsigned int ctx = 0;
4749 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4752 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4756 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4759 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4760 uint8_t *state= &h->cabac_state[ctx_base];
4764 MpegEncContext * const s = &h->s;
4765 const int mba_xy = h->left_mb_xy[0];
4766 const int mbb_xy = h->top_mb_xy;
4768 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4770 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4772 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4773 return 0; /* I4x4 */
4776 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4777 return 0; /* I4x4 */
4780 if( get_cabac_terminate( &h->cabac ) )
4781 return 25; /* PCM */
4783 mb_type = 1; /* I16x16 */
4784 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4785 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4786 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4787 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4788 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4792 static int decode_cabac_mb_type( H264Context *h ) {
4793 MpegEncContext * const s = &h->s;
4795 if( h->slice_type_nos == FF_I_TYPE ) {
4796 return decode_cabac_intra_mb_type(h, 3, 1);
4797 } else if( h->slice_type_nos == FF_P_TYPE ) {
4798 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4800 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4801 /* P_L0_D16x16, P_8x8 */
4802 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4804 /* P_L0_D8x16, P_L0_D16x8 */
4805 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4808 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4811 const int mba_xy = h->left_mb_xy[0];
4812 const int mbb_xy = h->top_mb_xy;
4815 assert(h->slice_type_nos == FF_B_TYPE);
4817 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4819 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4822 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4823 return 0; /* B_Direct_16x16 */
4825 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4826 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4829 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4830 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4831 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4832 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4834 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4835 else if( bits == 13 ) {
4836 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4837 } else if( bits == 14 )
4838 return 11; /* B_L1_L0_8x16 */
4839 else if( bits == 15 )
4840 return 22; /* B_8x8 */
4842 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4843 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4847 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4848 MpegEncContext * const s = &h->s;
4852 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4853 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4856 && h->slice_table[mba_xy] == h->slice_num
4857 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4858 mba_xy += s->mb_stride;
4860 mbb_xy = mb_xy - s->mb_stride;
4862 && h->slice_table[mbb_xy] == h->slice_num
4863 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4864 mbb_xy -= s->mb_stride;
4866 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4868 int mb_xy = h->mb_xy;
4870 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4873 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4875 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4878 if( h->slice_type_nos == FF_B_TYPE )
4880 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4883 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4886 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4889 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4890 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4891 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4893 if( mode >= pred_mode )
4899 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4900 const int mba_xy = h->left_mb_xy[0];
4901 const int mbb_xy = h->top_mb_xy;
4905 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4906 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4909 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4912 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4915 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4917 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4923 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4924 int cbp_b, cbp_a, ctx, cbp = 0;
4926 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4927 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4929 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4930 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4931 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4932 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4933 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4934 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4935 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4936 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4939 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4943 cbp_a = (h->left_cbp>>4)&0x03;
4944 cbp_b = (h-> top_cbp>>4)&0x03;
4947 if( cbp_a > 0 ) ctx++;
4948 if( cbp_b > 0 ) ctx += 2;
4949 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4953 if( cbp_a == 2 ) ctx++;
4954 if( cbp_b == 2 ) ctx += 2;
4955 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4957 static int decode_cabac_mb_dqp( H264Context *h) {
4958 int ctx= h->last_qscale_diff != 0;
4961 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4964 if(val > 102) //prevent infinite loop
4969 return (val + 1)>>1 ;
4971 return -((val + 1)>>1);
4973 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4974 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4976 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4978 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4982 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4984 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4985 return 0; /* B_Direct_8x8 */
4986 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4987 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4989 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4990 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4991 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4994 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4995 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4999 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5000 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5003 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5004 int refa = h->ref_cache[list][scan8[n] - 1];
5005 int refb = h->ref_cache[list][scan8[n] - 8];
5009 if( h->slice_type_nos == FF_B_TYPE) {
5010 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5012 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5021 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5027 if(ref >= 32 /*h->ref_list[list]*/){
5034 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5035 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5036 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5037 int ctxbase = (l == 0) ? 40 : 47;
5039 int ctx = (amvd>2) + (amvd>32);
5041 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5046 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5054 while( get_cabac_bypass( &h->cabac ) ) {
5058 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5063 if( get_cabac_bypass( &h->cabac ) )
5067 return get_cabac_bypass_sign( &h->cabac, -mvd );
5070 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5076 nza = h->left_cbp&0x100;
5077 nzb = h-> top_cbp&0x100;
5079 nza = (h->left_cbp>>(6+idx))&0x01;
5080 nzb = (h-> top_cbp>>(6+idx))&0x01;
5083 assert(cat == 1 || cat == 2 || cat == 4);
5084 nza = h->non_zero_count_cache[scan8[idx] - 1];
5085 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5094 return ctx + 4 * cat;
5097 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5098 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5099 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5100 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5101 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5104 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5105 static const int significant_coeff_flag_offset[2][6] = {
5106 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5107 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5109 static const int last_coeff_flag_offset[2][6] = {
5110 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5111 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5113 static const int coeff_abs_level_m1_offset[6] = {
5114 227+0, 227+10, 227+20, 227+30, 227+39, 426
5116 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5117 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5118 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5119 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5120 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5121 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5122 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5123 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5124 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5126 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5127 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5128 * map node ctx => cabac ctx for level=1 */
5129 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5130 /* map node ctx => cabac ctx for level>1 */
5131 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5132 static const uint8_t coeff_abs_level_transition[2][8] = {
5133 /* update node ctx after decoding a level=1 */
5134 { 1, 2, 3, 3, 4, 5, 6, 7 },
5135 /* update node ctx after decoding a level>1 */
5136 { 4, 4, 4, 4, 5, 6, 7, 7 }
5142 int coeff_count = 0;
5145 uint8_t *significant_coeff_ctx_base;
5146 uint8_t *last_coeff_ctx_base;
5147 uint8_t *abs_level_m1_ctx_base;
5150 #define CABAC_ON_STACK
5152 #ifdef CABAC_ON_STACK
5155 cc.range = h->cabac.range;
5156 cc.low = h->cabac.low;
5157 cc.bytestream= h->cabac.bytestream;
5159 #define CC &h->cabac
5163 /* cat: 0-> DC 16x16 n = 0
5164 * 1-> AC 16x16 n = luma4x4idx
5165 * 2-> Luma4x4 n = luma4x4idx
5166 * 3-> DC Chroma n = iCbCr
5167 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5168 * 5-> Luma8x8 n = 4 * luma8x8idx
5171 /* read coded block flag */
5172 if( is_dc || cat != 5 ) {
5173 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5175 h->non_zero_count_cache[scan8[n]] = 0;
5177 #ifdef CABAC_ON_STACK
5178 h->cabac.range = cc.range ;
5179 h->cabac.low = cc.low ;
5180 h->cabac.bytestream= cc.bytestream;
5186 significant_coeff_ctx_base = h->cabac_state
5187 + significant_coeff_flag_offset[MB_FIELD][cat];
5188 last_coeff_ctx_base = h->cabac_state
5189 + last_coeff_flag_offset[MB_FIELD][cat];
5190 abs_level_m1_ctx_base = h->cabac_state
5191 + coeff_abs_level_m1_offset[cat];
5193 if( !is_dc && cat == 5 ) {
5194 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5195 for(last= 0; last < coefs; last++) { \
5196 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5197 if( get_cabac( CC, sig_ctx )) { \
5198 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5199 index[coeff_count++] = last; \
5200 if( get_cabac( CC, last_ctx ) ) { \
5206 if( last == max_coeff -1 ) {\
5207 index[coeff_count++] = last;\
5209 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5210 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5211 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5213 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5215 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5217 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5220 assert(coeff_count > 0);
5224 h->cbp_table[h->mb_xy] |= 0x100;
5226 h->cbp_table[h->mb_xy] |= 0x40 << n;
5229 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5231 assert( cat == 1 || cat == 2 || cat == 4 );
5232 h->non_zero_count_cache[scan8[n]] = coeff_count;
5237 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5239 int j= scantable[index[--coeff_count]];
5241 if( get_cabac( CC, ctx ) == 0 ) {
5242 node_ctx = coeff_abs_level_transition[0][node_ctx];
5244 block[j] = get_cabac_bypass_sign( CC, -1);
5246 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5250 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5251 node_ctx = coeff_abs_level_transition[1][node_ctx];
5253 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5257 if( coeff_abs >= 15 ) {
5259 while( get_cabac_bypass( CC ) ) {
5265 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5271 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5273 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5276 } while( coeff_count );
5277 #ifdef CABAC_ON_STACK
5278 h->cabac.range = cc.range ;
5279 h->cabac.low = cc.low ;
5280 h->cabac.bytestream= cc.bytestream;
5285 #ifndef CONFIG_SMALL
5286 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5287 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5290 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5291 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5295 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5297 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5299 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5300 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5304 static inline void compute_mb_neighbors(H264Context *h)
5306 MpegEncContext * const s = &h->s;
5307 const int mb_xy = h->mb_xy;
5308 h->top_mb_xy = mb_xy - s->mb_stride;
5309 h->left_mb_xy[0] = mb_xy - 1;
5311 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5312 const int top_pair_xy = pair_xy - s->mb_stride;
5313 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5314 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5315 const int curr_mb_frame_flag = !MB_FIELD;
5316 const int bottom = (s->mb_y & 1);
5318 ? !curr_mb_frame_flag // bottom macroblock
5319 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5321 h->top_mb_xy -= s->mb_stride;
5323 if (left_mb_frame_flag != curr_mb_frame_flag) {
5324 h->left_mb_xy[0] = pair_xy - 1;
5326 } else if (FIELD_PICTURE) {
5327 h->top_mb_xy -= s->mb_stride;
5333 * decodes a macroblock
5334 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5336 static int decode_mb_cabac(H264Context *h) {
5337 MpegEncContext * const s = &h->s;
5339 int mb_type, partition_count, cbp = 0;
5340 int dct8x8_allowed= h->pps.transform_8x8_mode;
5342 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5344 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5345 if( h->slice_type_nos != FF_I_TYPE ) {
5347 /* a skipped mb needs the aff flag from the following mb */
5348 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5349 predict_field_decoding_flag(h);
5350 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5351 skip = h->next_mb_skipped;
5353 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5354 /* read skip flags */
5356 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5357 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5358 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5359 if(!h->next_mb_skipped)
5360 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5365 h->cbp_table[mb_xy] = 0;
5366 h->chroma_pred_mode_table[mb_xy] = 0;
5367 h->last_qscale_diff = 0;
5374 if( (s->mb_y&1) == 0 )
5376 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5379 h->prev_mb_skipped = 0;
5381 compute_mb_neighbors(h);
5382 mb_type = decode_cabac_mb_type( h );
5383 assert(mb_type >= 0);
5385 if( h->slice_type_nos == FF_B_TYPE ) {
5387 partition_count= b_mb_type_info[mb_type].partition_count;
5388 mb_type= b_mb_type_info[mb_type].type;
5391 goto decode_intra_mb;
5393 } else if( h->slice_type_nos == FF_P_TYPE ) {
5395 partition_count= p_mb_type_info[mb_type].partition_count;
5396 mb_type= p_mb_type_info[mb_type].type;
5399 goto decode_intra_mb;
5402 if(h->slice_type == FF_SI_TYPE && mb_type)
5404 assert(h->slice_type_nos == FF_I_TYPE);
5406 partition_count = 0;
5407 cbp= i_mb_type_info[mb_type].cbp;
5408 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5409 mb_type= i_mb_type_info[mb_type].type;
5412 mb_type |= MB_TYPE_INTERLACED;
5414 h->slice_table[ mb_xy ]= h->slice_num;
5416 if(IS_INTRA_PCM(mb_type)) {
5419 // We assume these blocks are very rare so we do not optimize it.
5420 // FIXME The two following lines get the bitstream position in the cabac
5421 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5422 ptr= h->cabac.bytestream;
5423 if(h->cabac.low&0x1) ptr--;
5425 if(h->cabac.low&0x1FF) ptr--;
5428 // The pixels are stored in the same order as levels in h->mb array.
5429 memcpy(h->mb, ptr, 256); ptr+=256;
5431 memcpy(h->mb+128, ptr, 128); ptr+=128;
5434 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5436 // All blocks are present
5437 h->cbp_table[mb_xy] = 0x1ef;
5438 h->chroma_pred_mode_table[mb_xy] = 0;
5439 // In deblocking, the quantizer is 0
5440 s->current_picture.qscale_table[mb_xy]= 0;
5441 // All coeffs are present
5442 memset(h->non_zero_count[mb_xy], 16, 16);
5443 s->current_picture.mb_type[mb_xy]= mb_type;
5444 h->last_qscale_diff = 0;
5449 h->ref_count[0] <<= 1;
5450 h->ref_count[1] <<= 1;
5453 fill_caches(h, mb_type, 0);
5455 if( IS_INTRA( mb_type ) ) {
5457 if( IS_INTRA4x4( mb_type ) ) {
5458 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5459 mb_type |= MB_TYPE_8x8DCT;
5460 for( i = 0; i < 16; i+=4 ) {
5461 int pred = pred_intra_mode( h, i );
5462 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5463 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5466 for( i = 0; i < 16; i++ ) {
5467 int pred = pred_intra_mode( h, i );
5468 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5470 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5473 write_back_intra_pred_mode(h);
5474 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5476 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5477 if( h->intra16x16_pred_mode < 0 ) return -1;
5480 h->chroma_pred_mode_table[mb_xy] =
5481 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5483 pred_mode= check_intra_pred_mode( h, pred_mode );
5484 if( pred_mode < 0 ) return -1;
5485 h->chroma_pred_mode= pred_mode;
5487 } else if( partition_count == 4 ) {
5488 int i, j, sub_partition_count[4], list, ref[2][4];
5490 if( h->slice_type_nos == FF_B_TYPE ) {
5491 for( i = 0; i < 4; i++ ) {
5492 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5493 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5494 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5496 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5497 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5498 pred_direct_motion(h, &mb_type);
5499 h->ref_cache[0][scan8[4]] =
5500 h->ref_cache[1][scan8[4]] =
5501 h->ref_cache[0][scan8[12]] =
5502 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5503 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5504 for( i = 0; i < 4; i++ )
5505 if( IS_DIRECT(h->sub_mb_type[i]) )
5506 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5510 for( i = 0; i < 4; i++ ) {
5511 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5512 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5513 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5517 for( list = 0; list < h->list_count; list++ ) {
5518 for( i = 0; i < 4; i++ ) {
5519 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5520 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5521 if( h->ref_count[list] > 1 ){
5522 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5523 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5524 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5532 h->ref_cache[list][ scan8[4*i]+1 ]=
5533 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5538 dct8x8_allowed = get_dct8x8_allowed(h);
5540 for(list=0; list<h->list_count; list++){
5542 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5543 if(IS_DIRECT(h->sub_mb_type[i])){
5544 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5548 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5549 const int sub_mb_type= h->sub_mb_type[i];
5550 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5551 for(j=0; j<sub_partition_count[i]; j++){
5554 const int index= 4*i + block_width*j;
5555 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5556 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5557 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5559 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5560 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5561 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5563 if(IS_SUB_8X8(sub_mb_type)){
5565 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5567 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5570 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5572 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5573 }else if(IS_SUB_8X4(sub_mb_type)){
5574 mv_cache[ 1 ][0]= mx;
5575 mv_cache[ 1 ][1]= my;
5577 mvd_cache[ 1 ][0]= mx - mpx;
5578 mvd_cache[ 1 ][1]= my - mpy;
5579 }else if(IS_SUB_4X8(sub_mb_type)){
5580 mv_cache[ 8 ][0]= mx;
5581 mv_cache[ 8 ][1]= my;
5583 mvd_cache[ 8 ][0]= mx - mpx;
5584 mvd_cache[ 8 ][1]= my - mpy;
5586 mv_cache[ 0 ][0]= mx;
5587 mv_cache[ 0 ][1]= my;
5589 mvd_cache[ 0 ][0]= mx - mpx;
5590 mvd_cache[ 0 ][1]= my - mpy;
5593 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5594 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5595 p[0] = p[1] = p[8] = p[9] = 0;
5596 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5600 } else if( IS_DIRECT(mb_type) ) {
5601 pred_direct_motion(h, &mb_type);
5602 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5603 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5604 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5606 int list, mx, my, i, mpx, mpy;
5607 if(IS_16X16(mb_type)){
5608 for(list=0; list<h->list_count; list++){
5609 if(IS_DIR(mb_type, 0, list)){
5611 if(h->ref_count[list] > 1){
5612 ref= decode_cabac_mb_ref(h, list, 0);
5613 if(ref >= (unsigned)h->ref_count[list]){
5614 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5619 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5621 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5623 for(list=0; list<h->list_count; list++){
5624 if(IS_DIR(mb_type, 0, list)){
5625 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5627 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5628 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5629 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5631 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5632 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5634 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5637 else if(IS_16X8(mb_type)){
5638 for(list=0; list<h->list_count; list++){
5640 if(IS_DIR(mb_type, i, list)){
5642 if(h->ref_count[list] > 1){
5643 ref= decode_cabac_mb_ref( h, list, 8*i );
5644 if(ref >= (unsigned)h->ref_count[list]){
5645 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5650 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5652 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5655 for(list=0; list<h->list_count; list++){
5657 if(IS_DIR(mb_type, i, list)){
5658 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5659 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5660 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5661 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5663 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5664 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5666 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5667 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5672 assert(IS_8X16(mb_type));
5673 for(list=0; list<h->list_count; list++){
5675 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5677 if(h->ref_count[list] > 1){
5678 ref= decode_cabac_mb_ref( h, list, 4*i );
5679 if(ref >= (unsigned)h->ref_count[list]){
5680 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5685 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5687 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5690 for(list=0; list<h->list_count; list++){
5692 if(IS_DIR(mb_type, i, list)){
5693 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5694 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5695 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5697 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5698 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5699 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5701 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5702 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5709 if( IS_INTER( mb_type ) ) {
5710 h->chroma_pred_mode_table[mb_xy] = 0;
5711 write_back_motion( h, mb_type );
5714 if( !IS_INTRA16x16( mb_type ) ) {
5715 cbp = decode_cabac_mb_cbp_luma( h );
5717 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5720 h->cbp_table[mb_xy] = h->cbp = cbp;
5722 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5723 if( decode_cabac_mb_transform_size( h ) )
5724 mb_type |= MB_TYPE_8x8DCT;
5726 s->current_picture.mb_type[mb_xy]= mb_type;
5728 if( cbp || IS_INTRA16x16( mb_type ) ) {
5729 const uint8_t *scan, *scan8x8, *dc_scan;
5730 const uint32_t *qmul;
5733 if(IS_INTERLACED(mb_type)){
5734 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5735 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5736 dc_scan= luma_dc_field_scan;
5738 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5739 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5740 dc_scan= luma_dc_zigzag_scan;
5743 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5744 if( dqp == INT_MIN ){
5745 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5749 if(((unsigned)s->qscale) > 51){
5750 if(s->qscale<0) s->qscale+= 52;
5751 else s->qscale-= 52;
5753 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5754 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5756 if( IS_INTRA16x16( mb_type ) ) {
5758 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5759 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5762 qmul = h->dequant4_coeff[0][s->qscale];
5763 for( i = 0; i < 16; i++ ) {
5764 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5765 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5768 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5772 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5773 if( cbp & (1<<i8x8) ) {
5774 if( IS_8x8DCT(mb_type) ) {
5775 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5776 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5778 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5779 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5780 const int index = 4*i8x8 + i4x4;
5781 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5783 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5784 //STOP_TIMER("decode_residual")
5788 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5789 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5796 for( c = 0; c < 2; c++ ) {
5797 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5798 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5804 for( c = 0; c < 2; c++ ) {
5805 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5806 for( i = 0; i < 4; i++ ) {
5807 const int index = 16 + 4 * c + i;
5808 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5809 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5813 uint8_t * const nnz= &h->non_zero_count_cache[0];
5814 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5815 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5818 uint8_t * const nnz= &h->non_zero_count_cache[0];
5819 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5820 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5821 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5822 h->last_qscale_diff = 0;
5825 s->current_picture.qscale_table[mb_xy]= s->qscale;
5826 write_back_non_zero_count(h);
5829 h->ref_count[0] >>= 1;
5830 h->ref_count[1] >>= 1;
5837 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5838 const int index_a = qp + h->slice_alpha_c0_offset;
5839 const int alpha = (alpha_table+52)[index_a];
5840 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5844 tc[0] = (tc0_table+52)[index_a][bS[0]];
5845 tc[1] = (tc0_table+52)[index_a][bS[1]];
5846 tc[2] = (tc0_table+52)[index_a][bS[2]];
5847 tc[3] = (tc0_table+52)[index_a][bS[3]];
5848 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5850 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5853 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5854 const int index_a = qp + h->slice_alpha_c0_offset;
5855 const int alpha = (alpha_table+52)[index_a];
5856 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5860 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5861 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5862 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5863 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5864 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5866 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5870 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5872 for( i = 0; i < 16; i++, pix += stride) {
5878 int bS_index = (i >> 1);
5881 bS_index |= (i & 1);
5884 if( bS[bS_index] == 0 ) {
5888 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5889 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5890 alpha = (alpha_table+52)[index_a];
5891 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5893 if( bS[bS_index] < 4 ) {
5894 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5895 const int p0 = pix[-1];
5896 const int p1 = pix[-2];
5897 const int p2 = pix[-3];
5898 const int q0 = pix[0];
5899 const int q1 = pix[1];
5900 const int q2 = pix[2];
5902 if( FFABS( p0 - q0 ) < alpha &&
5903 FFABS( p1 - p0 ) < beta &&
5904 FFABS( q1 - q0 ) < beta ) {
5908 if( FFABS( p2 - p0 ) < beta ) {
5909 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5912 if( FFABS( q2 - q0 ) < beta ) {
5913 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5917 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5918 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5919 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5920 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5923 const int p0 = pix[-1];
5924 const int p1 = pix[-2];
5925 const int p2 = pix[-3];
5927 const int q0 = pix[0];
5928 const int q1 = pix[1];
5929 const int q2 = pix[2];
5931 if( FFABS( p0 - q0 ) < alpha &&
5932 FFABS( p1 - p0 ) < beta &&
5933 FFABS( q1 - q0 ) < beta ) {
5935 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5936 if( FFABS( p2 - p0 ) < beta)
5938 const int p3 = pix[-4];
5940 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5941 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5942 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5945 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5947 if( FFABS( q2 - q0 ) < beta)
5949 const int q3 = pix[3];
5951 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5952 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5953 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5956 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5960 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5961 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5963 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5968 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5970 for( i = 0; i < 8; i++, pix += stride) {
5978 if( bS[bS_index] == 0 ) {
5982 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5983 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5984 alpha = (alpha_table+52)[index_a];
5985 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5987 if( bS[bS_index] < 4 ) {
5988 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
5989 const int p0 = pix[-1];
5990 const int p1 = pix[-2];
5991 const int q0 = pix[0];
5992 const int q1 = pix[1];
5994 if( FFABS( p0 - q0 ) < alpha &&
5995 FFABS( p1 - p0 ) < beta &&
5996 FFABS( q1 - q0 ) < beta ) {
5997 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5999 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6000 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6001 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6004 const int p0 = pix[-1];
6005 const int p1 = pix[-2];
6006 const int q0 = pix[0];
6007 const int q1 = pix[1];
6009 if( FFABS( p0 - q0 ) < alpha &&
6010 FFABS( p1 - p0 ) < beta &&
6011 FFABS( q1 - q0 ) < beta ) {
6013 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6014 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6015 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6021 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6022 const int index_a = qp + h->slice_alpha_c0_offset;
6023 const int alpha = (alpha_table+52)[index_a];
6024 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6028 tc[0] = (tc0_table+52)[index_a][bS[0]];
6029 tc[1] = (tc0_table+52)[index_a][bS[1]];
6030 tc[2] = (tc0_table+52)[index_a][bS[2]];
6031 tc[3] = (tc0_table+52)[index_a][bS[3]];
6032 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6034 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6038 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6039 const int index_a = qp + h->slice_alpha_c0_offset;
6040 const int alpha = (alpha_table+52)[index_a];
6041 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6045 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6046 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6047 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6048 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6049 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6051 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6055 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6056 MpegEncContext * const s = &h->s;
6057 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6059 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6063 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6064 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6065 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6066 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6067 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6070 assert(!FRAME_MBAFF);
6072 mb_type = s->current_picture.mb_type[mb_xy];
6073 qp = s->current_picture.qscale_table[mb_xy];
6074 qp0 = s->current_picture.qscale_table[mb_xy-1];
6075 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6076 qpc = get_chroma_qp( h, 0, qp );
6077 qpc0 = get_chroma_qp( h, 0, qp0 );
6078 qpc1 = get_chroma_qp( h, 0, qp1 );
6079 qp0 = (qp + qp0 + 1) >> 1;
6080 qp1 = (qp + qp1 + 1) >> 1;
6081 qpc0 = (qpc + qpc0 + 1) >> 1;
6082 qpc1 = (qpc + qpc1 + 1) >> 1;
6083 qp_thresh = 15 - h->slice_alpha_c0_offset;
6084 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6085 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6088 if( IS_INTRA(mb_type) ) {
6089 int16_t bS4[4] = {4,4,4,4};
6090 int16_t bS3[4] = {3,3,3,3};
6091 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6092 if( IS_8x8DCT(mb_type) ) {
6093 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6094 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6095 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6096 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6098 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6099 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6100 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6101 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6102 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6103 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6104 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6105 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6107 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6108 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6109 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6110 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6111 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6112 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6113 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6114 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6117 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6118 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6120 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6122 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6124 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6125 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6126 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6127 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6129 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6130 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6131 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6132 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6134 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6135 bSv[0][0] = 0x0004000400040004ULL;
6136 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6137 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6139 #define FILTER(hv,dir,edge)\
6140 if(bSv[dir][edge]) {\
6141 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6143 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6144 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6150 } else if( IS_8x8DCT(mb_type) ) {
6170 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6171 MpegEncContext * const s = &h->s;
6173 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6174 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6175 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6176 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6177 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6179 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6180 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6181 // how often to recheck mv-based bS when iterating between edges
6182 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6183 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6184 // how often to recheck mv-based bS when iterating along each edge
6185 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6187 if (first_vertical_edge_done) {
6191 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6194 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6195 && !IS_INTERLACED(mb_type)
6196 && IS_INTERLACED(mbm_type)
6198 // This is a special case in the norm where the filtering must
6199 // be done twice (one each of the field) even if we are in a
6200 // frame macroblock.
6202 static const int nnz_idx[4] = {4,5,6,3};
6203 unsigned int tmp_linesize = 2 * linesize;
6204 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6205 int mbn_xy = mb_xy - 2 * s->mb_stride;
6210 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6211 if( IS_INTRA(mb_type) ||
6212 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6213 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6215 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6216 for( i = 0; i < 4; i++ ) {
6217 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6218 mbn_nnz[nnz_idx[i]] != 0 )
6224 // Do not use s->qscale as luma quantizer because it has not the same
6225 // value in IPCM macroblocks.
6226 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6227 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6228 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6229 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6230 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6231 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6232 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6233 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6240 for( edge = start; edge < edges; edge++ ) {
6241 /* mbn_xy: neighbor macroblock */
6242 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6243 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6244 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6248 if( (edge&1) && IS_8x8DCT(mb_type) )
6251 if( IS_INTRA(mb_type) ||
6252 IS_INTRA(mbn_type) ) {
6255 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6256 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6265 bS[0] = bS[1] = bS[2] = bS[3] = value;
6270 if( edge & mask_edge ) {
6271 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6274 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6275 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6278 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6279 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6280 int bn_idx= b_idx - (dir ? 8:1);
6283 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6284 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6285 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6286 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6289 if(h->slice_type_nos == FF_B_TYPE && v){
6291 for( l = 0; !v && l < 2; l++ ) {
6293 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6294 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6295 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6299 bS[0] = bS[1] = bS[2] = bS[3] = v;
6305 for( i = 0; i < 4; i++ ) {
6306 int x = dir == 0 ? edge : i;
6307 int y = dir == 0 ? i : edge;
6308 int b_idx= 8 + 4 + x + 8*y;
6309 int bn_idx= b_idx - (dir ? 8:1);
6311 if( h->non_zero_count_cache[b_idx] |
6312 h->non_zero_count_cache[bn_idx] ) {
6318 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6319 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6320 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6321 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6327 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6329 for( l = 0; l < 2; l++ ) {
6331 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6332 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6333 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6342 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6347 // Do not use s->qscale as luma quantizer because it has not the same
6348 // value in IPCM macroblocks.
6349 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6350 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6351 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6352 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6354 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6355 if( (edge&1) == 0 ) {
6356 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6357 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6358 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6359 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6362 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6363 if( (edge&1) == 0 ) {
6364 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6365 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6366 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6367 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6373 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6374 MpegEncContext * const s = &h->s;
6375 const int mb_xy= mb_x + mb_y*s->mb_stride;
6376 const int mb_type = s->current_picture.mb_type[mb_xy];
6377 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6378 int first_vertical_edge_done = 0;
6381 //for sufficiently low qp, filtering wouldn't do anything
6382 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6384 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6385 int qp = s->current_picture.qscale_table[mb_xy];
6387 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6388 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6393 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6394 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6395 int top_type, left_type[2];
6396 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6397 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6398 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6400 if(IS_8x8DCT(top_type)){
6401 h->non_zero_count_cache[4+8*0]=
6402 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6403 h->non_zero_count_cache[6+8*0]=
6404 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6406 if(IS_8x8DCT(left_type[0])){
6407 h->non_zero_count_cache[3+8*1]=
6408 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6410 if(IS_8x8DCT(left_type[1])){
6411 h->non_zero_count_cache[3+8*3]=
6412 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6415 if(IS_8x8DCT(mb_type)){
6416 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6417 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6419 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6420 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6422 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6423 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6425 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6426 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6431 // left mb is in picture
6432 && h->slice_table[mb_xy-1] != 0xFFFF
6433 // and current and left pair do not have the same interlaced type
6434 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6435 // and left mb is in the same slice if deblocking_filter == 2
6436 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6437 /* First vertical edge is different in MBAFF frames
6438 * There are 8 different bS to compute and 2 different Qp
6440 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6441 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6446 int mb_qp, mbn0_qp, mbn1_qp;
6448 first_vertical_edge_done = 1;
6450 if( IS_INTRA(mb_type) )
6451 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6453 for( i = 0; i < 8; i++ ) {
6454 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6456 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6458 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6459 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6460 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6462 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6469 mb_qp = s->current_picture.qscale_table[mb_xy];
6470 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6471 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6472 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6473 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6474 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6475 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6476 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6477 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6478 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6479 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6480 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6481 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6484 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6485 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6486 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6487 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6488 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6492 for( dir = 0; dir < 2; dir++ )
6493 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6495 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6496 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6500 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6501 H264Context *h = *(void**)arg;
6502 MpegEncContext * const s = &h->s;
6503 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6507 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6508 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6510 if( h->pps.cabac ) {
6514 align_get_bits( &s->gb );
6517 ff_init_cabac_states( &h->cabac);
6518 ff_init_cabac_decoder( &h->cabac,
6519 s->gb.buffer + get_bits_count(&s->gb)/8,
6520 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6521 /* calculate pre-state */
6522 for( i= 0; i < 460; i++ ) {
6524 if( h->slice_type_nos == FF_I_TYPE )
6525 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6527 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6530 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6532 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6537 int ret = decode_mb_cabac(h);
6539 //STOP_TIMER("decode_mb_cabac")
6541 if(ret>=0) hl_decode_mb(h);
6543 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6546 ret = decode_mb_cabac(h);
6548 if(ret>=0) hl_decode_mb(h);
6551 eos = get_cabac_terminate( &h->cabac );
6553 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6554 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6555 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6559 if( ++s->mb_x >= s->mb_width ) {
6561 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6563 if(FIELD_OR_MBAFF_PICTURE) {
6568 if( eos || s->mb_y >= s->mb_height ) {
6569 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6570 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6577 int ret = decode_mb_cavlc(h);
6579 if(ret>=0) hl_decode_mb(h);
6581 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6583 ret = decode_mb_cavlc(h);
6585 if(ret>=0) hl_decode_mb(h);
6590 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6591 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6596 if(++s->mb_x >= s->mb_width){
6598 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6600 if(FIELD_OR_MBAFF_PICTURE) {
6603 if(s->mb_y >= s->mb_height){
6604 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6606 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6607 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6611 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6618 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6619 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6620 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6621 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6625 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6634 for(;s->mb_y < s->mb_height; s->mb_y++){
6635 for(;s->mb_x < s->mb_width; s->mb_x++){
6636 int ret= decode_mb(h);
6641 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6642 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6647 if(++s->mb_x >= s->mb_width){
6649 if(++s->mb_y >= s->mb_height){
6650 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6651 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6655 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6662 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6663 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6664 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6668 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6675 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6678 return -1; //not reached
6681 static int decode_picture_timing(H264Context *h){
6682 MpegEncContext * const s = &h->s;
6683 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6684 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6685 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6687 if(h->sps.pic_struct_present_flag){
6688 unsigned int i, num_clock_ts;
6689 h->sei_pic_struct = get_bits(&s->gb, 4);
6691 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6694 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6696 for (i = 0 ; i < num_clock_ts ; i++){
6697 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6698 unsigned int full_timestamp_flag;
6699 skip_bits(&s->gb, 2); /* ct_type */
6700 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6701 skip_bits(&s->gb, 5); /* counting_type */
6702 full_timestamp_flag = get_bits(&s->gb, 1);
6703 skip_bits(&s->gb, 1); /* discontinuity_flag */
6704 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6705 skip_bits(&s->gb, 8); /* n_frames */
6706 if(full_timestamp_flag){
6707 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6708 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6709 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6711 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6712 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6713 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6714 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6715 if(get_bits(&s->gb, 1)) /* hours_flag */
6716 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6720 if(h->sps.time_offset_length > 0)
6721 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6728 static int decode_unregistered_user_data(H264Context *h, int size){
6729 MpegEncContext * const s = &h->s;
6730 uint8_t user_data[16+256];
6736 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6737 user_data[i]= get_bits(&s->gb, 8);
6741 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6742 if(e==1 && build>=0)
6743 h->x264_build= build;
6745 if(s->avctx->debug & FF_DEBUG_BUGS)
6746 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6749 skip_bits(&s->gb, 8);
6754 static int decode_sei(H264Context *h){
6755 MpegEncContext * const s = &h->s;
6757 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6762 type+= show_bits(&s->gb, 8);
6763 }while(get_bits(&s->gb, 8) == 255);
6767 size+= show_bits(&s->gb, 8);
6768 }while(get_bits(&s->gb, 8) == 255);
6771 case 1: // Picture timing SEI
6772 if(decode_picture_timing(h) < 0)
6776 if(decode_unregistered_user_data(h, size) < 0)
6780 skip_bits(&s->gb, 8*size);
6783 //FIXME check bits here
6784 align_get_bits(&s->gb);
6790 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6791 MpegEncContext * const s = &h->s;
6793 cpb_count = get_ue_golomb(&s->gb) + 1;
6795 if(cpb_count > 32U){
6796 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6800 get_bits(&s->gb, 4); /* bit_rate_scale */
6801 get_bits(&s->gb, 4); /* cpb_size_scale */
6802 for(i=0; i<cpb_count; i++){
6803 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6804 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6805 get_bits1(&s->gb); /* cbr_flag */
6807 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6808 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6809 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6810 sps->time_offset_length = get_bits(&s->gb, 5);
6814 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6815 MpegEncContext * const s = &h->s;
6816 int aspect_ratio_info_present_flag;
6817 unsigned int aspect_ratio_idc;
6819 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6821 if( aspect_ratio_info_present_flag ) {
6822 aspect_ratio_idc= get_bits(&s->gb, 8);
6823 if( aspect_ratio_idc == EXTENDED_SAR ) {
6824 sps->sar.num= get_bits(&s->gb, 16);
6825 sps->sar.den= get_bits(&s->gb, 16);
6826 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6827 sps->sar= pixel_aspect[aspect_ratio_idc];
6829 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6836 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6838 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6839 get_bits1(&s->gb); /* overscan_appropriate_flag */
6842 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6843 get_bits(&s->gb, 3); /* video_format */
6844 get_bits1(&s->gb); /* video_full_range_flag */
6845 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6846 get_bits(&s->gb, 8); /* colour_primaries */
6847 get_bits(&s->gb, 8); /* transfer_characteristics */
6848 get_bits(&s->gb, 8); /* matrix_coefficients */
6852 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6853 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6854 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6857 sps->timing_info_present_flag = get_bits1(&s->gb);
6858 if(sps->timing_info_present_flag){
6859 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6860 sps->time_scale = get_bits_long(&s->gb, 32);
6861 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6864 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6865 if(sps->nal_hrd_parameters_present_flag)
6866 if(decode_hrd_parameters(h, sps) < 0)
6868 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6869 if(sps->vcl_hrd_parameters_present_flag)
6870 if(decode_hrd_parameters(h, sps) < 0)
6872 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6873 get_bits1(&s->gb); /* low_delay_hrd_flag */
6874 sps->pic_struct_present_flag = get_bits1(&s->gb);
6876 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6877 if(sps->bitstream_restriction_flag){
6878 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6879 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6880 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6881 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6882 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6883 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6884 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6886 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6887 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6895 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6896 const uint8_t *jvt_list, const uint8_t *fallback_list){
6897 MpegEncContext * const s = &h->s;
6898 int i, last = 8, next = 8;
6899 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6900 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6901 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6903 for(i=0;i<size;i++){
6905 next = (last + get_se_golomb(&s->gb)) & 0xff;
6906 if(!i && !next){ /* matrix not written, we use the preset one */
6907 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6910 last = factors[scan[i]] = next ? next : last;
6914 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6915 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6916 MpegEncContext * const s = &h->s;
6917 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6918 const uint8_t *fallback[4] = {
6919 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6920 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6921 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6922 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6924 if(get_bits1(&s->gb)){
6925 sps->scaling_matrix_present |= is_sps;
6926 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6927 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6928 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6929 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6930 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6931 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6932 if(is_sps || pps->transform_8x8_mode){
6933 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6934 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6939 static inline int decode_seq_parameter_set(H264Context *h){
6940 MpegEncContext * const s = &h->s;
6941 int profile_idc, level_idc;
6942 unsigned int sps_id;
6946 profile_idc= get_bits(&s->gb, 8);
6947 get_bits1(&s->gb); //constraint_set0_flag
6948 get_bits1(&s->gb); //constraint_set1_flag
6949 get_bits1(&s->gb); //constraint_set2_flag
6950 get_bits1(&s->gb); //constraint_set3_flag
6951 get_bits(&s->gb, 4); // reserved
6952 level_idc= get_bits(&s->gb, 8);
6953 sps_id= get_ue_golomb(&s->gb);
6955 if(sps_id >= MAX_SPS_COUNT) {
6956 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
6959 sps= av_mallocz(sizeof(SPS));
6963 sps->profile_idc= profile_idc;
6964 sps->level_idc= level_idc;
6966 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
6967 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
6968 sps->scaling_matrix_present = 0;
6970 if(sps->profile_idc >= 100){ //high profile
6971 sps->chroma_format_idc= get_ue_golomb(&s->gb);
6972 if(sps->chroma_format_idc == 3)
6973 get_bits1(&s->gb); //residual_color_transform_flag
6974 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6975 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6976 sps->transform_bypass = get_bits1(&s->gb);
6977 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6979 sps->chroma_format_idc= 1;
6982 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6983 sps->poc_type= get_ue_golomb(&s->gb);
6985 if(sps->poc_type == 0){ //FIXME #define
6986 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6987 } else if(sps->poc_type == 1){//FIXME #define
6988 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6989 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6990 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6991 sps->poc_cycle_length = get_ue_golomb(&s->gb);
6993 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
6994 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
6998 for(i=0; i<sps->poc_cycle_length; i++)
6999 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7000 }else if(sps->poc_type != 2){
7001 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7005 sps->ref_frame_count= get_ue_golomb(&s->gb);
7006 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7007 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7010 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7011 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7012 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7013 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7014 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7015 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7019 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7020 if(!sps->frame_mbs_only_flag)
7021 sps->mb_aff= get_bits1(&s->gb);
7025 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7027 #ifndef ALLOW_INTERLACE
7029 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7031 sps->crop= get_bits1(&s->gb);
7033 sps->crop_left = get_ue_golomb(&s->gb);
7034 sps->crop_right = get_ue_golomb(&s->gb);
7035 sps->crop_top = get_ue_golomb(&s->gb);
7036 sps->crop_bottom= get_ue_golomb(&s->gb);
7037 if(sps->crop_left || sps->crop_top){
7038 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7040 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7041 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7047 sps->crop_bottom= 0;
7050 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7051 if( sps->vui_parameters_present_flag )
7052 decode_vui_parameters(h, sps);
7054 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7055 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7056 sps_id, sps->profile_idc, sps->level_idc,
7058 sps->ref_frame_count,
7059 sps->mb_width, sps->mb_height,
7060 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7061 sps->direct_8x8_inference_flag ? "8B8" : "",
7062 sps->crop_left, sps->crop_right,
7063 sps->crop_top, sps->crop_bottom,
7064 sps->vui_parameters_present_flag ? "VUI" : "",
7065 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7068 av_free(h->sps_buffers[sps_id]);
7069 h->sps_buffers[sps_id]= sps;
7077 build_qp_table(PPS *pps, int t, int index)
7080 for(i = 0; i < 52; i++)
7081 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7084 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7085 MpegEncContext * const s = &h->s;
7086 unsigned int pps_id= get_ue_golomb(&s->gb);
7089 if(pps_id >= MAX_PPS_COUNT) {
7090 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7094 pps= av_mallocz(sizeof(PPS));
7097 pps->sps_id= get_ue_golomb(&s->gb);
7098 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7099 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7103 pps->cabac= get_bits1(&s->gb);
7104 pps->pic_order_present= get_bits1(&s->gb);
7105 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7106 if(pps->slice_group_count > 1 ){
7107 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7108 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7109 switch(pps->mb_slice_group_map_type){
7112 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7113 | run_length[ i ] |1 |ue(v) |
7118 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7120 | top_left_mb[ i ] |1 |ue(v) |
7121 | bottom_right_mb[ i ] |1 |ue(v) |
7129 | slice_group_change_direction_flag |1 |u(1) |
7130 | slice_group_change_rate_minus1 |1 |ue(v) |
7135 | slice_group_id_cnt_minus1 |1 |ue(v) |
7136 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7138 | slice_group_id[ i ] |1 |u(v) |
7143 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7144 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7145 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7146 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7150 pps->weighted_pred= get_bits1(&s->gb);
7151 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7152 pps->init_qp= get_se_golomb(&s->gb) + 26;
7153 pps->init_qs= get_se_golomb(&s->gb) + 26;
7154 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7155 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7156 pps->constrained_intra_pred= get_bits1(&s->gb);
7157 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7159 pps->transform_8x8_mode= 0;
7160 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7161 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7162 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7164 if(get_bits_count(&s->gb) < bit_length){
7165 pps->transform_8x8_mode= get_bits1(&s->gb);
7166 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7167 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7169 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7172 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7173 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7174 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7175 h->pps.chroma_qp_diff= 1;
7177 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7178 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7179 pps_id, pps->sps_id,
7180 pps->cabac ? "CABAC" : "CAVLC",
7181 pps->slice_group_count,
7182 pps->ref_count[0], pps->ref_count[1],
7183 pps->weighted_pred ? "weighted" : "",
7184 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7185 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7186 pps->constrained_intra_pred ? "CONSTR" : "",
7187 pps->redundant_pic_cnt_present ? "REDU" : "",
7188 pps->transform_8x8_mode ? "8x8DCT" : ""
7192 av_free(h->pps_buffers[pps_id]);
7193 h->pps_buffers[pps_id]= pps;
7201 * Call decode_slice() for each context.
7203 * @param h h264 master context
7204 * @param context_count number of contexts to execute
7206 static void execute_decode_slices(H264Context *h, int context_count){
7207 MpegEncContext * const s = &h->s;
7208 AVCodecContext * const avctx= s->avctx;
7212 if(context_count == 1) {
7213 decode_slice(avctx, &h);
7215 for(i = 1; i < context_count; i++) {
7216 hx = h->thread_context[i];
7217 hx->s.error_recognition = avctx->error_recognition;
7218 hx->s.error_count = 0;
7221 avctx->execute(avctx, (void *)decode_slice,
7222 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7224 /* pull back stuff from slices to master context */
7225 hx = h->thread_context[context_count - 1];
7226 s->mb_x = hx->s.mb_x;
7227 s->mb_y = hx->s.mb_y;
7228 s->dropable = hx->s.dropable;
7229 s->picture_structure = hx->s.picture_structure;
7230 for(i = 1; i < context_count; i++)
7231 h->s.error_count += h->thread_context[i]->s.error_count;
7236 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7237 MpegEncContext * const s = &h->s;
7238 AVCodecContext * const avctx= s->avctx;
7240 H264Context *hx; ///< thread context
7241 int context_count = 0;
7243 h->max_contexts = avctx->thread_count;
7246 for(i=0; i<50; i++){
7247 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7250 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7251 h->current_slice = 0;
7252 if (!s->first_field)
7253 s->current_picture_ptr= NULL;
7265 if(buf_index >= buf_size) break;
7267 for(i = 0; i < h->nal_length_size; i++)
7268 nalsize = (nalsize << 8) | buf[buf_index++];
7269 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7274 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7279 // start code prefix search
7280 for(; buf_index + 3 < buf_size; buf_index++){
7281 // This should always succeed in the first iteration.
7282 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7286 if(buf_index+3 >= buf_size) break;
7291 hx = h->thread_context[context_count];
7293 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7294 if (ptr==NULL || dst_length < 0){
7297 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7299 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7301 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7302 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7305 if (h->is_avc && (nalsize != consumed)){
7306 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7310 buf_index += consumed;
7312 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7313 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7318 switch(hx->nal_unit_type){
7320 if (h->nal_unit_type != NAL_IDR_SLICE) {
7321 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7324 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7326 init_get_bits(&hx->s.gb, ptr, bit_length);
7328 hx->inter_gb_ptr= &hx->s.gb;
7329 hx->s.data_partitioning = 0;
7331 if((err = decode_slice_header(hx, h)))
7334 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7335 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7336 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7337 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7338 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7339 && avctx->skip_frame < AVDISCARD_ALL)
7343 init_get_bits(&hx->s.gb, ptr, bit_length);
7345 hx->inter_gb_ptr= NULL;
7346 hx->s.data_partitioning = 1;
7348 err = decode_slice_header(hx, h);
7351 init_get_bits(&hx->intra_gb, ptr, bit_length);
7352 hx->intra_gb_ptr= &hx->intra_gb;
7355 init_get_bits(&hx->inter_gb, ptr, bit_length);
7356 hx->inter_gb_ptr= &hx->inter_gb;
7358 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7359 && s->context_initialized
7361 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7362 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7363 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7364 && avctx->skip_frame < AVDISCARD_ALL)
7368 init_get_bits(&s->gb, ptr, bit_length);
7372 init_get_bits(&s->gb, ptr, bit_length);
7373 decode_seq_parameter_set(h);
7375 if(s->flags& CODEC_FLAG_LOW_DELAY)
7378 if(avctx->has_b_frames < 2)
7379 avctx->has_b_frames= !s->low_delay;
7382 init_get_bits(&s->gb, ptr, bit_length);
7384 decode_picture_parameter_set(h, bit_length);
7388 case NAL_END_SEQUENCE:
7389 case NAL_END_STREAM:
7390 case NAL_FILLER_DATA:
7392 case NAL_AUXILIARY_SLICE:
7395 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7398 if(context_count == h->max_contexts) {
7399 execute_decode_slices(h, context_count);
7404 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7406 /* Slice could not be decoded in parallel mode, copy down
7407 * NAL unit stuff to context 0 and restart. Note that
7408 * rbsp_buffer is not transferred, but since we no longer
7409 * run in parallel mode this should not be an issue. */
7410 h->nal_unit_type = hx->nal_unit_type;
7411 h->nal_ref_idc = hx->nal_ref_idc;
7417 execute_decode_slices(h, context_count);
7422 * returns the number of bytes consumed for building the current frame
7424 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7425 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7426 if(pos+10>buf_size) pos=buf_size; // oops ;)
7431 static int decode_frame(AVCodecContext *avctx,
7432 void *data, int *data_size,
7433 const uint8_t *buf, int buf_size)
7435 H264Context *h = avctx->priv_data;
7436 MpegEncContext *s = &h->s;
7437 AVFrame *pict = data;
7440 s->flags= avctx->flags;
7441 s->flags2= avctx->flags2;
7443 /* end of stream, output what is still in the buffers */
7444 if (buf_size == 0) {
7448 //FIXME factorize this with the output code below
7449 out = h->delayed_pic[0];
7451 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7452 if(h->delayed_pic[i]->poc < out->poc){
7453 out = h->delayed_pic[i];
7457 for(i=out_idx; h->delayed_pic[i]; i++)
7458 h->delayed_pic[i] = h->delayed_pic[i+1];
7461 *data_size = sizeof(AVFrame);
7462 *pict= *(AVFrame*)out;
7468 if(h->is_avc && !h->got_avcC) {
7469 int i, cnt, nalsize;
7470 unsigned char *p = avctx->extradata;
7471 if(avctx->extradata_size < 7) {
7472 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7476 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7479 /* sps and pps in the avcC always have length coded with 2 bytes,
7480 so put a fake nal_length_size = 2 while parsing them */
7481 h->nal_length_size = 2;
7482 // Decode sps from avcC
7483 cnt = *(p+5) & 0x1f; // Number of sps
7485 for (i = 0; i < cnt; i++) {
7486 nalsize = AV_RB16(p) + 2;
7487 if(decode_nal_units(h, p, nalsize) < 0) {
7488 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7493 // Decode pps from avcC
7494 cnt = *(p++); // Number of pps
7495 for (i = 0; i < cnt; i++) {
7496 nalsize = AV_RB16(p) + 2;
7497 if(decode_nal_units(h, p, nalsize) != nalsize) {
7498 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7503 // Now store right nal length size, that will be use to parse all other nals
7504 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7505 // Do not reparse avcC
7509 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7510 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7515 buf_index=decode_nal_units(h, buf, buf_size);
7519 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7520 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7521 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7525 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7526 Picture *out = s->current_picture_ptr;
7527 Picture *cur = s->current_picture_ptr;
7528 int i, pics, cross_idr, out_of_order, out_idx;
7532 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7533 s->current_picture_ptr->pict_type= s->pict_type;
7536 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7537 h->prev_poc_msb= h->poc_msb;
7538 h->prev_poc_lsb= h->poc_lsb;
7540 h->prev_frame_num_offset= h->frame_num_offset;
7541 h->prev_frame_num= h->frame_num;
7544 * FIXME: Error handling code does not seem to support interlaced
7545 * when slices span multiple rows
7546 * The ff_er_add_slice calls don't work right for bottom
7547 * fields; they cause massive erroneous error concealing
7548 * Error marking covers both fields (top and bottom).
7549 * This causes a mismatched s->error_count
7550 * and a bad error table. Further, the error count goes to
7551 * INT_MAX when called for bottom field, because mb_y is
7552 * past end by one (callers fault) and resync_mb_y != 0
7553 * causes problems for the first MB line, too.
7560 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7561 /* Wait for second field. */
7565 cur->repeat_pict = 0;
7567 /* Signal interlacing information externally. */
7568 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7569 if(h->sps.pic_struct_present_flag){
7570 switch (h->sei_pic_struct)
7572 case SEI_PIC_STRUCT_FRAME:
7573 cur->interlaced_frame = 0;
7575 case SEI_PIC_STRUCT_TOP_FIELD:
7576 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7577 case SEI_PIC_STRUCT_TOP_BOTTOM:
7578 case SEI_PIC_STRUCT_BOTTOM_TOP:
7579 cur->interlaced_frame = 1;
7581 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7582 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7583 // Signal the possibility of telecined film externally (pic_struct 5,6)
7584 // From these hints, let the applications decide if they apply deinterlacing.
7585 cur->repeat_pict = 1;
7586 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7588 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7589 // Force progressive here, as doubling interlaced frame is a bad idea.
7590 cur->interlaced_frame = 0;
7591 cur->repeat_pict = 2;
7593 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7594 cur->interlaced_frame = 0;
7595 cur->repeat_pict = 4;
7599 /* Derive interlacing flag from used decoding process. */
7600 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7603 if (cur->field_poc[0] != cur->field_poc[1]){
7604 /* Derive top_field_first from field pocs. */
7605 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7607 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7608 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7609 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7610 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7611 cur->top_field_first = 1;
7613 cur->top_field_first = 0;
7615 /* Most likely progressive */
7616 cur->top_field_first = 0;
7620 //FIXME do something with unavailable reference frames
7622 /* Sort B-frames into display order */
7624 if(h->sps.bitstream_restriction_flag
7625 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7626 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7630 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7631 && !h->sps.bitstream_restriction_flag){
7632 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7637 while(h->delayed_pic[pics]) pics++;
7639 assert(pics <= MAX_DELAYED_PIC_COUNT);
7641 h->delayed_pic[pics++] = cur;
7642 if(cur->reference == 0)
7643 cur->reference = DELAYED_PIC_REF;
7645 out = h->delayed_pic[0];
7647 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7648 if(h->delayed_pic[i]->poc < out->poc){
7649 out = h->delayed_pic[i];
7652 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7654 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7656 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7658 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7660 ((!cross_idr && out->poc > h->outputed_poc + 2)
7661 || cur->pict_type == FF_B_TYPE)))
7664 s->avctx->has_b_frames++;
7667 if(out_of_order || pics > s->avctx->has_b_frames){
7668 out->reference &= ~DELAYED_PIC_REF;
7669 for(i=out_idx; h->delayed_pic[i]; i++)
7670 h->delayed_pic[i] = h->delayed_pic[i+1];
7672 if(!out_of_order && pics > s->avctx->has_b_frames){
7673 *data_size = sizeof(AVFrame);
7675 h->outputed_poc = out->poc;
7676 *pict= *(AVFrame*)out;
7678 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7683 assert(pict->data[0] || !*data_size);
7684 ff_print_debug_info(s, pict);
7685 //printf("out %d\n", (int)pict->data[0]);
7688 /* Return the Picture timestamp as the frame number */
7689 /* we subtract 1 because it is added on utils.c */
7690 avctx->frame_number = s->picture_number - 1;
7692 return get_consumed_bytes(s, buf_index, buf_size);
7695 static inline void fill_mb_avail(H264Context *h){
7696 MpegEncContext * const s = &h->s;
7697 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7700 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7701 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7702 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7708 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7709 h->mb_avail[4]= 1; //FIXME move out
7710 h->mb_avail[5]= 0; //FIXME move out
7718 #define SIZE (COUNT*40)
7724 // int int_temp[10000];
7726 AVCodecContext avctx;
7728 dsputil_init(&dsp, &avctx);
7730 init_put_bits(&pb, temp, SIZE);
7731 printf("testing unsigned exp golomb\n");
7732 for(i=0; i<COUNT; i++){
7734 set_ue_golomb(&pb, i);
7735 STOP_TIMER("set_ue_golomb");
7737 flush_put_bits(&pb);
7739 init_get_bits(&gb, temp, 8*SIZE);
7740 for(i=0; i<COUNT; i++){
7743 s= show_bits(&gb, 24);
7746 j= get_ue_golomb(&gb);
7748 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7751 STOP_TIMER("get_ue_golomb");
7755 init_put_bits(&pb, temp, SIZE);
7756 printf("testing signed exp golomb\n");
7757 for(i=0; i<COUNT; i++){
7759 set_se_golomb(&pb, i - COUNT/2);
7760 STOP_TIMER("set_se_golomb");
7762 flush_put_bits(&pb);
7764 init_get_bits(&gb, temp, 8*SIZE);
7765 for(i=0; i<COUNT; i++){
7768 s= show_bits(&gb, 24);
7771 j= get_se_golomb(&gb);
7772 if(j != i - COUNT/2){
7773 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7776 STOP_TIMER("get_se_golomb");
7780 printf("testing 4x4 (I)DCT\n");
7783 uint8_t src[16], ref[16];
7784 uint64_t error= 0, max_error=0;
7786 for(i=0; i<COUNT; i++){
7788 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7789 for(j=0; j<16; j++){
7790 ref[j]= random()%255;
7791 src[j]= random()%255;
7794 h264_diff_dct_c(block, src, ref, 4);
7797 for(j=0; j<16; j++){
7798 // printf("%d ", block[j]);
7799 block[j]= block[j]*4;
7800 if(j&1) block[j]= (block[j]*4 + 2)/5;
7801 if(j&4) block[j]= (block[j]*4 + 2)/5;
7805 s->dsp.h264_idct_add(ref, block, 4);
7806 /* for(j=0; j<16; j++){
7807 printf("%d ", ref[j]);
7811 for(j=0; j<16; j++){
7812 int diff= FFABS(src[j] - ref[j]);
7815 max_error= FFMAX(max_error, diff);
7818 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7819 printf("testing quantizer\n");
7820 for(qp=0; qp<52; qp++){
7822 src1_block[i]= src2_block[i]= random()%255;
7825 printf("Testing NAL layer\n");
7827 uint8_t bitstream[COUNT];
7828 uint8_t nal[COUNT*2];
7830 memset(&h, 0, sizeof(H264Context));
7832 for(i=0; i<COUNT; i++){
7840 for(j=0; j<COUNT; j++){
7841 bitstream[j]= (random() % 255) + 1;
7844 for(j=0; j<zeros; j++){
7845 int pos= random() % COUNT;
7846 while(bitstream[pos] == 0){
7855 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7857 printf("encoding failed\n");
7861 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7865 if(out_length != COUNT){
7866 printf("incorrect length %d %d\n", out_length, COUNT);
7870 if(consumed != nal_length){
7871 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7875 if(memcmp(bitstream, out, COUNT)){
7876 printf("mismatch\n");
7882 printf("Testing RBSP\n");
7890 static av_cold int decode_end(AVCodecContext *avctx)
7892 H264Context *h = avctx->priv_data;
7893 MpegEncContext *s = &h->s;
7896 av_freep(&h->rbsp_buffer[0]);
7897 av_freep(&h->rbsp_buffer[1]);
7898 free_tables(h); //FIXME cleanup init stuff perhaps
7900 for(i = 0; i < MAX_SPS_COUNT; i++)
7901 av_freep(h->sps_buffers + i);
7903 for(i = 0; i < MAX_PPS_COUNT; i++)
7904 av_freep(h->pps_buffers + i);
7908 // memset(h, 0, sizeof(H264Context));
7914 AVCodec h264_decoder = {
7918 sizeof(H264Context),
7923 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7925 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),