2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1433 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1436 *consumed= si + 1;//+1 for the header
1437 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1442 * identifies the exact end of the bitstream
1443 * @return the length of the trailing, or 0 if damaged
1445 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1449 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1459 * IDCT transforms the 16 dc values and dequantizes them.
1460 * @param qp quantization parameter
1462 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1465 int temp[16]; //FIXME check if this is a good idea
1466 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1467 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1469 //memset(block, 64, 2*256);
1472 const int offset= y_offset[i];
1473 const int z0= block[offset+stride*0] + block[offset+stride*4];
1474 const int z1= block[offset+stride*0] - block[offset+stride*4];
1475 const int z2= block[offset+stride*1] - block[offset+stride*5];
1476 const int z3= block[offset+stride*1] + block[offset+stride*5];
1485 const int offset= x_offset[i];
1486 const int z0= temp[4*0+i] + temp[4*2+i];
1487 const int z1= temp[4*0+i] - temp[4*2+i];
1488 const int z2= temp[4*1+i] - temp[4*3+i];
1489 const int z3= temp[4*1+i] + temp[4*3+i];
1491 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1492 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1493 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1494 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1500 * DCT transforms the 16 dc values.
1501 * @param qp quantization parameter ??? FIXME
1503 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1504 // const int qmul= dequant_coeff[qp][0];
1506 int temp[16]; //FIXME check if this is a good idea
1507 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1508 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1511 const int offset= y_offset[i];
1512 const int z0= block[offset+stride*0] + block[offset+stride*4];
1513 const int z1= block[offset+stride*0] - block[offset+stride*4];
1514 const int z2= block[offset+stride*1] - block[offset+stride*5];
1515 const int z3= block[offset+stride*1] + block[offset+stride*5];
1524 const int offset= x_offset[i];
1525 const int z0= temp[4*0+i] + temp[4*2+i];
1526 const int z1= temp[4*0+i] - temp[4*2+i];
1527 const int z2= temp[4*1+i] - temp[4*3+i];
1528 const int z3= temp[4*1+i] + temp[4*3+i];
1530 block[stride*0 +offset]= (z0 + z3)>>1;
1531 block[stride*2 +offset]= (z1 + z2)>>1;
1532 block[stride*8 +offset]= (z1 - z2)>>1;
1533 block[stride*10+offset]= (z0 - z3)>>1;
1541 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1542 const int stride= 16*2;
1543 const int xStride= 16;
1546 a= block[stride*0 + xStride*0];
1547 b= block[stride*0 + xStride*1];
1548 c= block[stride*1 + xStride*0];
1549 d= block[stride*1 + xStride*1];
1556 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1557 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1558 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1559 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1563 static void chroma_dc_dct_c(DCTELEM *block){
1564 const int stride= 16*2;
1565 const int xStride= 16;
1568 a= block[stride*0 + xStride*0];
1569 b= block[stride*0 + xStride*1];
1570 c= block[stride*1 + xStride*0];
1571 d= block[stride*1 + xStride*1];
1578 block[stride*0 + xStride*0]= (a+c);
1579 block[stride*0 + xStride*1]= (e+b);
1580 block[stride*1 + xStride*0]= (a-c);
1581 block[stride*1 + xStride*1]= (e-b);
1586 * gets the chroma qp.
1588 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1589 return h->pps.chroma_qp_table[t][qscale];
1592 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1593 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1594 int src_x_offset, int src_y_offset,
1595 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1596 MpegEncContext * const s = &h->s;
1597 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1598 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1599 const int luma_xy= (mx&3) + ((my&3)<<2);
1600 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1601 uint8_t * src_cb, * src_cr;
1602 int extra_width= h->emu_edge_width;
1603 int extra_height= h->emu_edge_height;
1605 const int full_mx= mx>>2;
1606 const int full_my= my>>2;
1607 const int pic_width = 16*s->mb_width;
1608 const int pic_height = 16*s->mb_height >> MB_FIELD;
1610 if(mx&7) extra_width -= 3;
1611 if(my&7) extra_height -= 3;
1613 if( full_mx < 0-extra_width
1614 || full_my < 0-extra_height
1615 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1616 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1617 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1618 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1622 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1624 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1627 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1630 // chroma offset when predicting from a field of opposite parity
1631 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1632 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1634 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1635 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1638 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1639 src_cb= s->edge_emu_buffer;
1641 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1644 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1645 src_cr= s->edge_emu_buffer;
1647 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1650 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1651 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1652 int x_offset, int y_offset,
1653 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1654 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1655 int list0, int list1){
1656 MpegEncContext * const s = &h->s;
1657 qpel_mc_func *qpix_op= qpix_put;
1658 h264_chroma_mc_func chroma_op= chroma_put;
1660 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1661 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1662 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1663 x_offset += 8*s->mb_x;
1664 y_offset += 8*(s->mb_y >> MB_FIELD);
1667 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1668 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1669 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1670 qpix_op, chroma_op);
1673 chroma_op= chroma_avg;
1677 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1684 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1685 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1686 int x_offset, int y_offset,
1687 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1688 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1689 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1690 int list0, int list1){
1691 MpegEncContext * const s = &h->s;
1693 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1694 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1695 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1696 x_offset += 8*s->mb_x;
1697 y_offset += 8*(s->mb_y >> MB_FIELD);
1700 /* don't optimize for luma-only case, since B-frames usually
1701 * use implicit weights => chroma too. */
1702 uint8_t *tmp_cb = s->obmc_scratchpad;
1703 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1704 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1705 int refn0 = h->ref_cache[0][ scan8[n] ];
1706 int refn1 = h->ref_cache[1][ scan8[n] ];
1708 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1709 dest_y, dest_cb, dest_cr,
1710 x_offset, y_offset, qpix_put, chroma_put);
1711 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1712 tmp_y, tmp_cb, tmp_cr,
1713 x_offset, y_offset, qpix_put, chroma_put);
1715 if(h->use_weight == 2){
1716 int weight0 = h->implicit_weight[refn0][refn1];
1717 int weight1 = 64 - weight0;
1718 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1719 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1720 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1722 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1723 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1724 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1725 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1726 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1727 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1728 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1729 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1730 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1733 int list = list1 ? 1 : 0;
1734 int refn = h->ref_cache[list][ scan8[n] ];
1735 Picture *ref= &h->ref_list[list][refn];
1736 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1737 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1738 qpix_put, chroma_put);
1740 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1741 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1742 if(h->use_weight_chroma){
1743 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1745 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1746 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1751 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1752 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1753 int x_offset, int y_offset,
1754 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1755 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1756 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1757 int list0, int list1){
1758 if((h->use_weight==2 && list0 && list1
1759 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1760 || h->use_weight==1)
1761 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1762 x_offset, y_offset, qpix_put, chroma_put,
1763 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1765 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1766 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1769 static inline void prefetch_motion(H264Context *h, int list){
1770 /* fetch pixels for estimated mv 4 macroblocks ahead
1771 * optimized for 64byte cache lines */
1772 MpegEncContext * const s = &h->s;
1773 const int refn = h->ref_cache[list][scan8[0]];
1775 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1776 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1777 uint8_t **src= h->ref_list[list][refn].data;
1778 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1779 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1780 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1781 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1785 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1786 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1787 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1788 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1789 MpegEncContext * const s = &h->s;
1790 const int mb_xy= h->mb_xy;
1791 const int mb_type= s->current_picture.mb_type[mb_xy];
1793 assert(IS_INTER(mb_type));
1795 prefetch_motion(h, 0);
1797 if(IS_16X16(mb_type)){
1798 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1799 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1800 &weight_op[0], &weight_avg[0],
1801 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1802 }else if(IS_16X8(mb_type)){
1803 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1804 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1805 &weight_op[1], &weight_avg[1],
1806 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1807 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1808 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1809 &weight_op[1], &weight_avg[1],
1810 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1811 }else if(IS_8X16(mb_type)){
1812 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1813 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1814 &weight_op[2], &weight_avg[2],
1815 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1816 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1817 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1818 &weight_op[2], &weight_avg[2],
1819 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1823 assert(IS_8X8(mb_type));
1826 const int sub_mb_type= h->sub_mb_type[i];
1828 int x_offset= (i&1)<<2;
1829 int y_offset= (i&2)<<1;
1831 if(IS_SUB_8X8(sub_mb_type)){
1832 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1833 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1834 &weight_op[3], &weight_avg[3],
1835 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1836 }else if(IS_SUB_8X4(sub_mb_type)){
1837 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1838 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1839 &weight_op[4], &weight_avg[4],
1840 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1841 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1842 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1843 &weight_op[4], &weight_avg[4],
1844 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1845 }else if(IS_SUB_4X8(sub_mb_type)){
1846 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1847 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1848 &weight_op[5], &weight_avg[5],
1849 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1850 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1851 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1852 &weight_op[5], &weight_avg[5],
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1856 assert(IS_SUB_4X4(sub_mb_type));
1858 int sub_x_offset= x_offset + 2*(j&1);
1859 int sub_y_offset= y_offset + (j&2);
1860 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[6], &weight_avg[6],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1869 prefetch_motion(h, 1);
1872 static av_cold void decode_init_vlc(void){
1873 static int done = 0;
1880 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1881 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1882 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1883 &chroma_dc_coeff_token_len [0], 1, 1,
1884 &chroma_dc_coeff_token_bits[0], 1, 1,
1885 INIT_VLC_USE_NEW_STATIC);
1889 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1890 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1891 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1892 &coeff_token_len [i][0], 1, 1,
1893 &coeff_token_bits[i][0], 1, 1,
1894 INIT_VLC_USE_NEW_STATIC);
1895 offset += coeff_token_vlc_tables_size[i];
1898 * This is a one time safety check to make sure that
1899 * the packed static coeff_token_vlc table sizes
1900 * were initialized correctly.
1902 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1905 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1906 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1907 init_vlc(&chroma_dc_total_zeros_vlc[i],
1908 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1909 &chroma_dc_total_zeros_len [i][0], 1, 1,
1910 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1911 INIT_VLC_USE_NEW_STATIC);
1913 for(i=0; i<15; i++){
1914 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1915 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1916 init_vlc(&total_zeros_vlc[i],
1917 TOTAL_ZEROS_VLC_BITS, 16,
1918 &total_zeros_len [i][0], 1, 1,
1919 &total_zeros_bits[i][0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1924 run_vlc[i].table = run_vlc_tables[i];
1925 run_vlc[i].table_allocated = run_vlc_tables_size;
1926 init_vlc(&run_vlc[i],
1928 &run_len [i][0], 1, 1,
1929 &run_bits[i][0], 1, 1,
1930 INIT_VLC_USE_NEW_STATIC);
1932 run7_vlc.table = run7_vlc_table,
1933 run7_vlc.table_allocated = run7_vlc_table_size;
1934 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1935 &run_len [6][0], 1, 1,
1936 &run_bits[6][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1941 static void free_tables(H264Context *h){
1944 av_freep(&h->intra4x4_pred_mode);
1945 av_freep(&h->chroma_pred_mode_table);
1946 av_freep(&h->cbp_table);
1947 av_freep(&h->mvd_table[0]);
1948 av_freep(&h->mvd_table[1]);
1949 av_freep(&h->direct_table);
1950 av_freep(&h->non_zero_count);
1951 av_freep(&h->slice_table_base);
1952 h->slice_table= NULL;
1954 av_freep(&h->mb2b_xy);
1955 av_freep(&h->mb2b8_xy);
1957 for(i = 0; i < h->s.avctx->thread_count; i++) {
1958 hx = h->thread_context[i];
1960 av_freep(&hx->top_borders[1]);
1961 av_freep(&hx->top_borders[0]);
1962 av_freep(&hx->s.obmc_scratchpad);
1966 static void init_dequant8_coeff_table(H264Context *h){
1968 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1969 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1970 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1972 for(i=0; i<2; i++ ){
1973 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1974 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1978 for(q=0; q<52; q++){
1979 int shift = div6[q];
1982 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1983 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1984 h->pps.scaling_matrix8[i][x]) << shift;
1989 static void init_dequant4_coeff_table(H264Context *h){
1991 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1992 for(i=0; i<6; i++ ){
1993 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1995 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1996 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2003 for(q=0; q<52; q++){
2004 int shift = div6[q] + 2;
2007 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2008 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2009 h->pps.scaling_matrix4[i][x]) << shift;
2014 static void init_dequant_tables(H264Context *h){
2016 init_dequant4_coeff_table(h);
2017 if(h->pps.transform_8x8_mode)
2018 init_dequant8_coeff_table(h);
2019 if(h->sps.transform_bypass){
2022 h->dequant4_coeff[i][0][x] = 1<<6;
2023 if(h->pps.transform_8x8_mode)
2026 h->dequant8_coeff[i][0][x] = 1<<6;
2033 * needs width/height
2035 static int alloc_tables(H264Context *h){
2036 MpegEncContext * const s = &h->s;
2037 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2040 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2042 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2043 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2044 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2046 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2047 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2048 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2049 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2051 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2052 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2054 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2055 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2056 for(y=0; y<s->mb_height; y++){
2057 for(x=0; x<s->mb_width; x++){
2058 const int mb_xy= x + y*s->mb_stride;
2059 const int b_xy = 4*x + 4*y*h->b_stride;
2060 const int b8_xy= 2*x + 2*y*h->b8_stride;
2062 h->mb2b_xy [mb_xy]= b_xy;
2063 h->mb2b8_xy[mb_xy]= b8_xy;
2067 s->obmc_scratchpad = NULL;
2069 if(!h->dequant4_coeff[0])
2070 init_dequant_tables(h);
2079 * Mimic alloc_tables(), but for every context thread.
2081 static void clone_tables(H264Context *dst, H264Context *src){
2082 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2083 dst->non_zero_count = src->non_zero_count;
2084 dst->slice_table = src->slice_table;
2085 dst->cbp_table = src->cbp_table;
2086 dst->mb2b_xy = src->mb2b_xy;
2087 dst->mb2b8_xy = src->mb2b8_xy;
2088 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2089 dst->mvd_table[0] = src->mvd_table[0];
2090 dst->mvd_table[1] = src->mvd_table[1];
2091 dst->direct_table = src->direct_table;
2093 dst->s.obmc_scratchpad = NULL;
2094 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2099 * Allocate buffers which are not shared amongst multiple threads.
2101 static int context_init(H264Context *h){
2102 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2107 return -1; // free_tables will clean up for us
2110 static av_cold void common_init(H264Context *h){
2111 MpegEncContext * const s = &h->s;
2113 s->width = s->avctx->width;
2114 s->height = s->avctx->height;
2115 s->codec_id= s->avctx->codec->id;
2117 ff_h264_pred_init(&h->hpc, s->codec_id);
2119 h->dequant_coeff_pps= -1;
2120 s->unrestricted_mv=1;
2121 s->decode=1; //FIXME
2123 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2125 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2126 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2129 static av_cold int decode_init(AVCodecContext *avctx){
2130 H264Context *h= avctx->priv_data;
2131 MpegEncContext * const s = &h->s;
2133 MPV_decode_defaults(s);
2138 s->out_format = FMT_H264;
2139 s->workaround_bugs= avctx->workaround_bugs;
2142 // s->decode_mb= ff_h263_decode_mb;
2143 s->quarter_sample = 1;
2146 if(avctx->codec_id == CODEC_ID_SVQ3)
2147 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2149 avctx->pix_fmt= PIX_FMT_YUV420P;
2153 if(avctx->extradata_size > 0 && avctx->extradata &&
2154 *(char *)avctx->extradata == 1){
2161 h->thread_context[0] = h;
2162 h->outputed_poc = INT_MIN;
2163 h->prev_poc_msb= 1<<16;
2167 static int frame_start(H264Context *h){
2168 MpegEncContext * const s = &h->s;
2171 if(MPV_frame_start(s, s->avctx) < 0)
2173 ff_er_frame_start(s);
2175 * MPV_frame_start uses pict_type to derive key_frame.
2176 * This is incorrect for H.264; IDR markings must be used.
2177 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2178 * See decode_nal_units().
2180 s->current_picture_ptr->key_frame= 0;
2182 assert(s->linesize && s->uvlinesize);
2184 for(i=0; i<16; i++){
2185 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2186 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2189 h->block_offset[16+i]=
2190 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2191 h->block_offset[24+16+i]=
2192 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2195 /* can't be in alloc_tables because linesize isn't known there.
2196 * FIXME: redo bipred weight to not require extra buffer? */
2197 for(i = 0; i < s->avctx->thread_count; i++)
2198 if(!h->thread_context[i]->s.obmc_scratchpad)
2199 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2201 /* some macroblocks will be accessed before they're available */
2202 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2203 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2205 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2207 // We mark the current picture as non-reference after allocating it, so
2208 // that if we break out due to an error it can be released automatically
2209 // in the next MPV_frame_start().
2210 // SVQ3 as well as most other codecs have only last/next/current and thus
2211 // get released even with set reference, besides SVQ3 and others do not
2212 // mark frames as reference later "naturally".
2213 if(s->codec_id != CODEC_ID_SVQ3)
2214 s->current_picture_ptr->reference= 0;
2216 s->current_picture_ptr->field_poc[0]=
2217 s->current_picture_ptr->field_poc[1]= INT_MAX;
2218 assert(s->current_picture_ptr->long_ref==0);
2223 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2224 MpegEncContext * const s = &h->s;
2233 src_cb -= uvlinesize;
2234 src_cr -= uvlinesize;
2236 if(!simple && FRAME_MBAFF){
2238 offset = MB_MBAFF ? 1 : 17;
2239 uvoffset= MB_MBAFF ? 1 : 9;
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2242 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2243 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2244 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2245 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2250 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2251 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2252 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2253 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2259 top_idx = MB_MBAFF ? 0 : 1;
2261 step= MB_MBAFF ? 2 : 1;
2264 // There are two lines saved, the line above the the top macroblock of a pair,
2265 // and the line above the bottom macroblock
2266 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2267 for(i=1; i<17 - skiplast; i++){
2268 h->left_border[offset+i*step]= src_y[15+i* linesize];
2271 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2272 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2274 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2275 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2276 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2277 for(i=1; i<9 - skiplast; i++){
2278 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2279 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2281 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2282 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2286 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2287 MpegEncContext * const s = &h->s;
2298 if(!simple && FRAME_MBAFF){
2300 offset = MB_MBAFF ? 1 : 17;
2301 uvoffset= MB_MBAFF ? 1 : 9;
2305 top_idx = MB_MBAFF ? 0 : 1;
2307 step= MB_MBAFF ? 2 : 1;
2310 if(h->deblocking_filter == 2) {
2312 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2313 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2315 deblock_left = (s->mb_x > 0);
2316 deblock_top = (s->mb_y > !!MB_FIELD);
2319 src_y -= linesize + 1;
2320 src_cb -= uvlinesize + 1;
2321 src_cr -= uvlinesize + 1;
2323 #define XCHG(a,b,t,xchg)\
2330 for(i = !deblock_top; i<16; i++){
2331 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2333 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2337 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2338 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2339 if(s->mb_x+1 < s->mb_width){
2340 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2344 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2346 for(i = !deblock_top; i<8; i++){
2347 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2348 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2350 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2351 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2355 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2360 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2361 MpegEncContext * const s = &h->s;
2362 const int mb_x= s->mb_x;
2363 const int mb_y= s->mb_y;
2364 const int mb_xy= h->mb_xy;
2365 const int mb_type= s->current_picture.mb_type[mb_xy];
2366 uint8_t *dest_y, *dest_cb, *dest_cr;
2367 int linesize, uvlinesize /*dct_offset*/;
2369 int *block_offset = &h->block_offset[0];
2370 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2371 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2372 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2373 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2375 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2376 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2377 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2379 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2380 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2382 if (!simple && MB_FIELD) {
2383 linesize = h->mb_linesize = s->linesize * 2;
2384 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2385 block_offset = &h->block_offset[24];
2386 if(mb_y&1){ //FIXME move out of this function?
2387 dest_y -= s->linesize*15;
2388 dest_cb-= s->uvlinesize*7;
2389 dest_cr-= s->uvlinesize*7;
2393 for(list=0; list<h->list_count; list++){
2394 if(!USES_LIST(mb_type, list))
2396 if(IS_16X16(mb_type)){
2397 int8_t *ref = &h->ref_cache[list][scan8[0]];
2398 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2400 for(i=0; i<16; i+=4){
2401 int ref = h->ref_cache[list][scan8[i]];
2403 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2409 linesize = h->mb_linesize = s->linesize;
2410 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2411 // dct_offset = s->linesize * 16;
2414 if (!simple && IS_INTRA_PCM(mb_type)) {
2415 for (i=0; i<16; i++) {
2416 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2418 for (i=0; i<8; i++) {
2419 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2420 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2423 if(IS_INTRA(mb_type)){
2424 if(h->deblocking_filter)
2425 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2427 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2428 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2429 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2432 if(IS_INTRA4x4(mb_type)){
2433 if(simple || !s->encoding){
2434 if(IS_8x8DCT(mb_type)){
2435 if(transform_bypass){
2437 idct_add = s->dsp.add_pixels8;
2439 idct_dc_add = s->dsp.h264_idct8_dc_add;
2440 idct_add = s->dsp.h264_idct8_add;
2442 for(i=0; i<16; i+=4){
2443 uint8_t * const ptr= dest_y + block_offset[i];
2444 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2445 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2446 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2448 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2449 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2450 (h->topright_samples_available<<i)&0x4000, linesize);
2452 if(nnz == 1 && h->mb[i*16])
2453 idct_dc_add(ptr, h->mb + i*16, linesize);
2455 idct_add (ptr, h->mb + i*16, linesize);
2460 if(transform_bypass){
2462 idct_add = s->dsp.add_pixels4;
2464 idct_dc_add = s->dsp.h264_idct_dc_add;
2465 idct_add = s->dsp.h264_idct_add;
2467 for(i=0; i<16; i++){
2468 uint8_t * const ptr= dest_y + block_offset[i];
2469 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2471 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2472 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2476 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2477 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2478 assert(mb_y || linesize <= block_offset[i]);
2479 if(!topright_avail){
2480 tr= ptr[3 - linesize]*0x01010101;
2481 topright= (uint8_t*) &tr;
2483 topright= ptr + 4 - linesize;
2487 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2488 nnz = h->non_zero_count_cache[ scan8[i] ];
2491 if(nnz == 1 && h->mb[i*16])
2492 idct_dc_add(ptr, h->mb + i*16, linesize);
2494 idct_add (ptr, h->mb + i*16, linesize);
2496 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2503 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2505 if(!transform_bypass)
2506 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2508 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2510 if(h->deblocking_filter)
2511 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2513 hl_motion(h, dest_y, dest_cb, dest_cr,
2514 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2515 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2516 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2520 if(!IS_INTRA4x4(mb_type)){
2522 if(IS_INTRA16x16(mb_type)){
2523 if(transform_bypass){
2524 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2525 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2527 for(i=0; i<16; i++){
2528 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2529 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2533 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2535 }else if(h->cbp&15){
2536 if(transform_bypass){
2537 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2538 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2539 for(i=0; i<16; i+=di){
2540 if(h->non_zero_count_cache[ scan8[i] ]){
2541 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2545 if(IS_8x8DCT(mb_type)){
2546 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2548 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2553 for(i=0; i<16; i++){
2554 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2555 uint8_t * const ptr= dest_y + block_offset[i];
2556 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2562 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2563 uint8_t *dest[2] = {dest_cb, dest_cr};
2564 if(transform_bypass){
2565 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2566 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2567 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2569 idct_add = s->dsp.add_pixels4;
2570 for(i=16; i<16+8; i++){
2571 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2572 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2576 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2577 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2579 idct_add = s->dsp.h264_idct_add;
2580 idct_dc_add = s->dsp.h264_idct_dc_add;
2581 for(i=16; i<16+8; i++){
2582 if(h->non_zero_count_cache[ scan8[i] ])
2583 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2584 else if(h->mb[i*16])
2585 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2588 for(i=16; i<16+8; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2590 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2591 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2598 if(h->deblocking_filter) {
2599 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2600 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2601 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2602 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2603 if (!simple && FRAME_MBAFF) {
2604 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2606 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2612 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2614 static void hl_decode_mb_simple(H264Context *h){
2615 hl_decode_mb_internal(h, 1);
2619 * Process a macroblock; this handles edge cases, such as interlacing.
2621 static void av_noinline hl_decode_mb_complex(H264Context *h){
2622 hl_decode_mb_internal(h, 0);
2625 static void hl_decode_mb(H264Context *h){
2626 MpegEncContext * const s = &h->s;
2627 const int mb_xy= h->mb_xy;
2628 const int mb_type= s->current_picture.mb_type[mb_xy];
2629 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2631 if(ENABLE_H264_ENCODER && !s->decode)
2635 hl_decode_mb_complex(h);
2636 else hl_decode_mb_simple(h);
2639 static void pic_as_field(Picture *pic, const int parity){
2641 for (i = 0; i < 4; ++i) {
2642 if (parity == PICT_BOTTOM_FIELD)
2643 pic->data[i] += pic->linesize[i];
2644 pic->reference = parity;
2645 pic->linesize[i] *= 2;
2647 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2650 static int split_field_copy(Picture *dest, Picture *src,
2651 int parity, int id_add){
2652 int match = !!(src->reference & parity);
2656 if(parity != PICT_FRAME){
2657 pic_as_field(dest, parity);
2659 dest->pic_id += id_add;
2666 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2670 while(i[0]<len || i[1]<len){
2671 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2673 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2676 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2677 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2680 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2681 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2688 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2693 best_poc= dir ? INT_MIN : INT_MAX;
2695 for(i=0; i<len; i++){
2696 const int poc= src[i]->poc;
2697 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2699 sorted[out_i]= src[i];
2702 if(best_poc == (dir ? INT_MIN : INT_MAX))
2704 limit= sorted[out_i++]->poc - dir;
2710 * fills the default_ref_list.
2712 static int fill_default_ref_list(H264Context *h){
2713 MpegEncContext * const s = &h->s;
2716 if(h->slice_type_nos==FF_B_TYPE){
2717 Picture *sorted[32];
2722 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2724 cur_poc= s->current_picture_ptr->poc;
2726 for(list= 0; list<2; list++){
2727 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2728 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2730 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2731 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2734 if(len < h->ref_count[list])
2735 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2739 if(lens[0] == lens[1] && lens[1] > 1){
2740 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2742 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2745 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2746 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2748 if(len < h->ref_count[0])
2749 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2752 for (i=0; i<h->ref_count[0]; i++) {
2753 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2755 if(h->slice_type_nos==FF_B_TYPE){
2756 for (i=0; i<h->ref_count[1]; i++) {
2757 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2764 static void print_short_term(H264Context *h);
2765 static void print_long_term(H264Context *h);
2768 * Extract structure information about the picture described by pic_num in
2769 * the current decoding context (frame or field). Note that pic_num is
2770 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2771 * @param pic_num picture number for which to extract structure information
2772 * @param structure one of PICT_XXX describing structure of picture
2774 * @return frame number (short term) or long term index of picture
2775 * described by pic_num
2777 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2778 MpegEncContext * const s = &h->s;
2780 *structure = s->picture_structure;
2783 /* opposite field */
2784 *structure ^= PICT_FRAME;
2791 static int decode_ref_pic_list_reordering(H264Context *h){
2792 MpegEncContext * const s = &h->s;
2793 int list, index, pic_structure;
2795 print_short_term(h);
2798 for(list=0; list<h->list_count; list++){
2799 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2801 if(get_bits1(&s->gb)){
2802 int pred= h->curr_pic_num;
2804 for(index=0; ; index++){
2805 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2806 unsigned int pic_id;
2808 Picture *ref = NULL;
2810 if(reordering_of_pic_nums_idc==3)
2813 if(index >= h->ref_count[list]){
2814 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2818 if(reordering_of_pic_nums_idc<3){
2819 if(reordering_of_pic_nums_idc<2){
2820 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2823 if(abs_diff_pic_num > h->max_pic_num){
2824 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2828 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2829 else pred+= abs_diff_pic_num;
2830 pred &= h->max_pic_num - 1;
2832 frame_num = pic_num_extract(h, pred, &pic_structure);
2834 for(i= h->short_ref_count-1; i>=0; i--){
2835 ref = h->short_ref[i];
2836 assert(ref->reference);
2837 assert(!ref->long_ref);
2839 ref->frame_num == frame_num &&
2840 (ref->reference & pic_structure)
2848 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2850 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2853 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2856 ref = h->long_ref[long_idx];
2857 assert(!(ref && !ref->reference));
2858 if(ref && (ref->reference & pic_structure)){
2859 ref->pic_id= pic_id;
2860 assert(ref->long_ref);
2868 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2869 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2871 for(i=index; i+1<h->ref_count[list]; i++){
2872 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2875 for(; i > index; i--){
2876 h->ref_list[list][i]= h->ref_list[list][i-1];
2878 h->ref_list[list][index]= *ref;
2880 pic_as_field(&h->ref_list[list][index], pic_structure);
2884 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2890 for(list=0; list<h->list_count; list++){
2891 for(index= 0; index < h->ref_count[list]; index++){
2892 if(!h->ref_list[list][index].data[0]){
2893 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2894 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2902 static void fill_mbaff_ref_list(H264Context *h){
2904 for(list=0; list<2; list++){ //FIXME try list_count
2905 for(i=0; i<h->ref_count[list]; i++){
2906 Picture *frame = &h->ref_list[list][i];
2907 Picture *field = &h->ref_list[list][16+2*i];
2910 field[0].linesize[j] <<= 1;
2911 field[0].reference = PICT_TOP_FIELD;
2912 field[0].poc= field[0].field_poc[0];
2913 field[1] = field[0];
2915 field[1].data[j] += frame->linesize[j];
2916 field[1].reference = PICT_BOTTOM_FIELD;
2917 field[1].poc= field[1].field_poc[1];
2919 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2920 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2922 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2923 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2927 for(j=0; j<h->ref_count[1]; j++){
2928 for(i=0; i<h->ref_count[0]; i++)
2929 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2930 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2931 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2935 static int pred_weight_table(H264Context *h){
2936 MpegEncContext * const s = &h->s;
2938 int luma_def, chroma_def;
2941 h->use_weight_chroma= 0;
2942 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2943 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2944 luma_def = 1<<h->luma_log2_weight_denom;
2945 chroma_def = 1<<h->chroma_log2_weight_denom;
2947 for(list=0; list<2; list++){
2948 for(i=0; i<h->ref_count[list]; i++){
2949 int luma_weight_flag, chroma_weight_flag;
2951 luma_weight_flag= get_bits1(&s->gb);
2952 if(luma_weight_flag){
2953 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2954 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2955 if( h->luma_weight[list][i] != luma_def
2956 || h->luma_offset[list][i] != 0)
2959 h->luma_weight[list][i]= luma_def;
2960 h->luma_offset[list][i]= 0;
2964 chroma_weight_flag= get_bits1(&s->gb);
2965 if(chroma_weight_flag){
2968 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2969 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2970 if( h->chroma_weight[list][i][j] != chroma_def
2971 || h->chroma_offset[list][i][j] != 0)
2972 h->use_weight_chroma= 1;
2977 h->chroma_weight[list][i][j]= chroma_def;
2978 h->chroma_offset[list][i][j]= 0;
2983 if(h->slice_type_nos != FF_B_TYPE) break;
2985 h->use_weight= h->use_weight || h->use_weight_chroma;
2989 static void implicit_weight_table(H264Context *h){
2990 MpegEncContext * const s = &h->s;
2992 int cur_poc = s->current_picture_ptr->poc;
2994 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2995 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2997 h->use_weight_chroma= 0;
3002 h->use_weight_chroma= 2;
3003 h->luma_log2_weight_denom= 5;
3004 h->chroma_log2_weight_denom= 5;
3006 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3007 int poc0 = h->ref_list[0][ref0].poc;
3008 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3009 int poc1 = h->ref_list[1][ref1].poc;
3010 int td = av_clip(poc1 - poc0, -128, 127);
3012 int tb = av_clip(cur_poc - poc0, -128, 127);
3013 int tx = (16384 + (FFABS(td) >> 1)) / td;
3014 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3015 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3016 h->implicit_weight[ref0][ref1] = 32;
3018 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3020 h->implicit_weight[ref0][ref1] = 32;
3026 * Mark a picture as no longer needed for reference. The refmask
3027 * argument allows unreferencing of individual fields or the whole frame.
3028 * If the picture becomes entirely unreferenced, but is being held for
3029 * display purposes, it is marked as such.
3030 * @param refmask mask of fields to unreference; the mask is bitwise
3031 * anded with the reference marking of pic
3032 * @return non-zero if pic becomes entirely unreferenced (except possibly
3033 * for display purposes) zero if one of the fields remains in
3036 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3038 if (pic->reference &= refmask) {
3041 for(i = 0; h->delayed_pic[i]; i++)
3042 if(pic == h->delayed_pic[i]){
3043 pic->reference=DELAYED_PIC_REF;
3051 * instantaneous decoder refresh.
3053 static void idr(H264Context *h){
3056 for(i=0; i<16; i++){
3057 remove_long(h, i, 0);
3059 assert(h->long_ref_count==0);
3061 for(i=0; i<h->short_ref_count; i++){
3062 unreference_pic(h, h->short_ref[i], 0);
3063 h->short_ref[i]= NULL;
3065 h->short_ref_count=0;
3066 h->prev_frame_num= 0;
3067 h->prev_frame_num_offset= 0;
3072 /* forget old pics after a seek */
3073 static void flush_dpb(AVCodecContext *avctx){
3074 H264Context *h= avctx->priv_data;
3076 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3077 if(h->delayed_pic[i])
3078 h->delayed_pic[i]->reference= 0;
3079 h->delayed_pic[i]= NULL;
3081 h->outputed_poc= INT_MIN;
3083 if(h->s.current_picture_ptr)
3084 h->s.current_picture_ptr->reference= 0;
3085 h->s.first_field= 0;
3086 ff_mpeg_flush(avctx);
3090 * Find a Picture in the short term reference list by frame number.
3091 * @param frame_num frame number to search for
3092 * @param idx the index into h->short_ref where returned picture is found
3093 * undefined if no picture found.
3094 * @return pointer to the found picture, or NULL if no pic with the provided
3095 * frame number is found
3097 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3098 MpegEncContext * const s = &h->s;
3101 for(i=0; i<h->short_ref_count; i++){
3102 Picture *pic= h->short_ref[i];
3103 if(s->avctx->debug&FF_DEBUG_MMCO)
3104 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3105 if(pic->frame_num == frame_num) {
3114 * Remove a picture from the short term reference list by its index in
3115 * that list. This does no checking on the provided index; it is assumed
3116 * to be valid. Other list entries are shifted down.
3117 * @param i index into h->short_ref of picture to remove.
3119 static void remove_short_at_index(H264Context *h, int i){
3120 assert(i >= 0 && i < h->short_ref_count);
3121 h->short_ref[i]= NULL;
3122 if (--h->short_ref_count)
3123 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3128 * @return the removed picture or NULL if an error occurs
3130 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3131 MpegEncContext * const s = &h->s;
3135 if(s->avctx->debug&FF_DEBUG_MMCO)
3136 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3138 pic = find_short(h, frame_num, &i);
3140 if(unreference_pic(h, pic, ref_mask))
3141 remove_short_at_index(h, i);
3148 * Remove a picture from the long term reference list by its index in
3150 * @return the removed picture or NULL if an error occurs
3152 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3155 pic= h->long_ref[i];
3157 if(unreference_pic(h, pic, ref_mask)){
3158 assert(h->long_ref[i]->long_ref == 1);
3159 h->long_ref[i]->long_ref= 0;
3160 h->long_ref[i]= NULL;
3161 h->long_ref_count--;
3169 * print short term list
3171 static void print_short_term(H264Context *h) {
3173 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3174 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3175 for(i=0; i<h->short_ref_count; i++){
3176 Picture *pic= h->short_ref[i];
3177 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3183 * print long term list
3185 static void print_long_term(H264Context *h) {
3187 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3188 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3189 for(i = 0; i < 16; i++){
3190 Picture *pic= h->long_ref[i];
3192 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3199 * Executes the reference picture marking (memory management control operations).
3201 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3202 MpegEncContext * const s = &h->s;
3204 int current_ref_assigned=0;
3207 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3208 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3210 for(i=0; i<mmco_count; i++){
3211 int structure, frame_num;
3212 if(s->avctx->debug&FF_DEBUG_MMCO)
3213 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3215 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3216 || mmco[i].opcode == MMCO_SHORT2LONG){
3217 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3218 pic = find_short(h, frame_num, &j);
3220 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3221 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3222 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3227 switch(mmco[i].opcode){
3228 case MMCO_SHORT2UNUSED:
3229 if(s->avctx->debug&FF_DEBUG_MMCO)
3230 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3231 remove_short(h, frame_num, structure ^ PICT_FRAME);
3233 case MMCO_SHORT2LONG:
3234 if (h->long_ref[mmco[i].long_arg] != pic)
3235 remove_long(h, mmco[i].long_arg, 0);
3237 remove_short_at_index(h, j);
3238 h->long_ref[ mmco[i].long_arg ]= pic;
3239 if (h->long_ref[ mmco[i].long_arg ]){
3240 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3241 h->long_ref_count++;
3244 case MMCO_LONG2UNUSED:
3245 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3246 pic = h->long_ref[j];
3248 remove_long(h, j, structure ^ PICT_FRAME);
3249 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3250 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3253 // Comment below left from previous code as it is an interresting note.
3254 /* First field in pair is in short term list or
3255 * at a different long term index.
3256 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3257 * Report the problem and keep the pair where it is,
3258 * and mark this field valid.
3261 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3262 remove_long(h, mmco[i].long_arg, 0);
3264 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3265 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3266 h->long_ref_count++;
3269 s->current_picture_ptr->reference |= s->picture_structure;
3270 current_ref_assigned=1;
3272 case MMCO_SET_MAX_LONG:
3273 assert(mmco[i].long_arg <= 16);
3274 // just remove the long term which index is greater than new max
3275 for(j = mmco[i].long_arg; j<16; j++){
3276 remove_long(h, j, 0);
3280 while(h->short_ref_count){
3281 remove_short(h, h->short_ref[0]->frame_num, 0);
3283 for(j = 0; j < 16; j++) {
3284 remove_long(h, j, 0);
3286 s->current_picture_ptr->poc=
3287 s->current_picture_ptr->field_poc[0]=
3288 s->current_picture_ptr->field_poc[1]=
3292 s->current_picture_ptr->frame_num= 0;
3298 if (!current_ref_assigned) {
3299 /* Second field of complementary field pair; the first field of
3300 * which is already referenced. If short referenced, it
3301 * should be first entry in short_ref. If not, it must exist
3302 * in long_ref; trying to put it on the short list here is an
3303 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3305 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3306 /* Just mark the second field valid */
3307 s->current_picture_ptr->reference = PICT_FRAME;
3308 } else if (s->current_picture_ptr->long_ref) {
3309 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3310 "assignment for second field "
3311 "in complementary field pair "
3312 "(first field is long term)\n");
3314 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3316 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3319 if(h->short_ref_count)
3320 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3322 h->short_ref[0]= s->current_picture_ptr;
3323 h->short_ref_count++;
3324 s->current_picture_ptr->reference |= s->picture_structure;
3328 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3330 /* We have too many reference frames, probably due to corrupted
3331 * stream. Need to discard one frame. Prevents overrun of the
3332 * short_ref and long_ref buffers.
3334 av_log(h->s.avctx, AV_LOG_ERROR,
3335 "number of reference frames exceeds max (probably "
3336 "corrupt input), discarding one\n");
3338 if (h->long_ref_count && !h->short_ref_count) {
3339 for (i = 0; i < 16; ++i)
3344 remove_long(h, i, 0);
3346 pic = h->short_ref[h->short_ref_count - 1];
3347 remove_short(h, pic->frame_num, 0);
3351 print_short_term(h);
3356 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3357 MpegEncContext * const s = &h->s;
3361 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3362 s->broken_link= get_bits1(gb) -1;
3364 h->mmco[0].opcode= MMCO_LONG;
3365 h->mmco[0].long_arg= 0;
3369 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3370 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3371 MMCOOpcode opcode= get_ue_golomb(gb);
3373 h->mmco[i].opcode= opcode;
3374 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3375 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3376 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3377 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3381 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3382 unsigned int long_arg= get_ue_golomb(gb);
3383 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3384 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3387 h->mmco[i].long_arg= long_arg;
3390 if(opcode > (unsigned)MMCO_LONG){
3391 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3394 if(opcode == MMCO_END)
3399 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3401 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3402 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3403 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3404 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3406 if (FIELD_PICTURE) {
3407 h->mmco[0].short_pic_num *= 2;
3408 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3409 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3419 static int init_poc(H264Context *h){
3420 MpegEncContext * const s = &h->s;
3421 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3423 Picture *cur = s->current_picture_ptr;
3425 h->frame_num_offset= h->prev_frame_num_offset;
3426 if(h->frame_num < h->prev_frame_num)
3427 h->frame_num_offset += max_frame_num;
3429 if(h->sps.poc_type==0){
3430 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3432 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3433 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3434 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3435 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3437 h->poc_msb = h->prev_poc_msb;
3438 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3440 field_poc[1] = h->poc_msb + h->poc_lsb;
3441 if(s->picture_structure == PICT_FRAME)
3442 field_poc[1] += h->delta_poc_bottom;
3443 }else if(h->sps.poc_type==1){
3444 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3447 if(h->sps.poc_cycle_length != 0)
3448 abs_frame_num = h->frame_num_offset + h->frame_num;
3452 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3455 expected_delta_per_poc_cycle = 0;
3456 for(i=0; i < h->sps.poc_cycle_length; i++)
3457 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3459 if(abs_frame_num > 0){
3460 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3461 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3463 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3464 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3465 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3469 if(h->nal_ref_idc == 0)
3470 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3472 field_poc[0] = expectedpoc + h->delta_poc[0];
3473 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3475 if(s->picture_structure == PICT_FRAME)
3476 field_poc[1] += h->delta_poc[1];
3478 int poc= 2*(h->frame_num_offset + h->frame_num);
3487 if(s->picture_structure != PICT_BOTTOM_FIELD)
3488 s->current_picture_ptr->field_poc[0]= field_poc[0];
3489 if(s->picture_structure != PICT_TOP_FIELD)
3490 s->current_picture_ptr->field_poc[1]= field_poc[1];
3491 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3498 * initialize scan tables
3500 static void init_scan_tables(H264Context *h){
3501 MpegEncContext * const s = &h->s;
3503 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3504 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3505 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3507 for(i=0; i<16; i++){
3508 #define T(x) (x>>2) | ((x<<2) & 0xF)
3509 h->zigzag_scan[i] = T(zigzag_scan[i]);
3510 h-> field_scan[i] = T( field_scan[i]);
3514 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3515 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3516 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3517 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3518 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3520 for(i=0; i<64; i++){
3521 #define T(x) (x>>3) | ((x&7)<<3)
3522 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3523 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3524 h->field_scan8x8[i] = T(field_scan8x8[i]);
3525 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3529 if(h->sps.transform_bypass){ //FIXME same ugly
3530 h->zigzag_scan_q0 = zigzag_scan;
3531 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3532 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3533 h->field_scan_q0 = field_scan;
3534 h->field_scan8x8_q0 = field_scan8x8;
3535 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3537 h->zigzag_scan_q0 = h->zigzag_scan;
3538 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3539 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3540 h->field_scan_q0 = h->field_scan;
3541 h->field_scan8x8_q0 = h->field_scan8x8;
3542 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3547 * Replicates H264 "master" context to thread contexts.
3549 static void clone_slice(H264Context *dst, H264Context *src)
3551 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3552 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3553 dst->s.current_picture = src->s.current_picture;
3554 dst->s.linesize = src->s.linesize;
3555 dst->s.uvlinesize = src->s.uvlinesize;
3556 dst->s.first_field = src->s.first_field;
3558 dst->prev_poc_msb = src->prev_poc_msb;
3559 dst->prev_poc_lsb = src->prev_poc_lsb;
3560 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3561 dst->prev_frame_num = src->prev_frame_num;
3562 dst->short_ref_count = src->short_ref_count;
3564 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3565 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3566 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3567 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3569 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3570 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3574 * decodes a slice header.
3575 * This will also call MPV_common_init() and frame_start() as needed.
3577 * @param h h264context
3578 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3580 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3582 static int decode_slice_header(H264Context *h, H264Context *h0){
3583 MpegEncContext * const s = &h->s;
3584 MpegEncContext * const s0 = &h0->s;
3585 unsigned int first_mb_in_slice;
3586 unsigned int pps_id;
3587 int num_ref_idx_active_override_flag;
3588 unsigned int slice_type, tmp, i, j;
3589 int default_ref_list_done = 0;
3590 int last_pic_structure;
3592 s->dropable= h->nal_ref_idc == 0;
3594 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3595 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3596 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3598 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3599 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3602 first_mb_in_slice= get_ue_golomb(&s->gb);
3604 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3605 h0->current_slice = 0;
3606 if (!s0->first_field)
3607 s->current_picture_ptr= NULL;
3610 slice_type= get_ue_golomb(&s->gb);
3612 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3617 h->slice_type_fixed=1;
3619 h->slice_type_fixed=0;
3621 slice_type= golomb_to_pict_type[ slice_type ];
3622 if (slice_type == FF_I_TYPE
3623 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3624 default_ref_list_done = 1;
3626 h->slice_type= slice_type;
3627 h->slice_type_nos= slice_type & 3;
3629 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3630 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3631 av_log(h->s.avctx, AV_LOG_ERROR,
3632 "B picture before any references, skipping\n");
3636 pps_id= get_ue_golomb(&s->gb);
3637 if(pps_id>=MAX_PPS_COUNT){
3638 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3641 if(!h0->pps_buffers[pps_id]) {
3642 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3645 h->pps= *h0->pps_buffers[pps_id];
3647 if(!h0->sps_buffers[h->pps.sps_id]) {
3648 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3651 h->sps = *h0->sps_buffers[h->pps.sps_id];
3653 if(h == h0 && h->dequant_coeff_pps != pps_id){
3654 h->dequant_coeff_pps = pps_id;
3655 init_dequant_tables(h);
3658 s->mb_width= h->sps.mb_width;
3659 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3661 h->b_stride= s->mb_width*4;
3662 h->b8_stride= s->mb_width*2;
3664 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3665 if(h->sps.frame_mbs_only_flag)
3666 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3668 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3670 if (s->context_initialized
3671 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3673 return -1; // width / height changed during parallelized decoding
3675 flush_dpb(s->avctx);
3678 if (!s->context_initialized) {
3680 return -1; // we cant (re-)initialize context during parallel decoding
3681 if (MPV_common_init(s) < 0)
3685 init_scan_tables(h);
3688 for(i = 1; i < s->avctx->thread_count; i++) {
3690 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3691 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3692 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3695 init_scan_tables(c);
3699 for(i = 0; i < s->avctx->thread_count; i++)
3700 if(context_init(h->thread_context[i]) < 0)
3703 s->avctx->width = s->width;
3704 s->avctx->height = s->height;
3705 s->avctx->sample_aspect_ratio= h->sps.sar;
3706 if(!s->avctx->sample_aspect_ratio.den)
3707 s->avctx->sample_aspect_ratio.den = 1;
3709 if(h->sps.timing_info_present_flag){
3710 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3711 if(h->x264_build > 0 && h->x264_build < 44)
3712 s->avctx->time_base.den *= 2;
3713 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3714 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3718 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3721 h->mb_aff_frame = 0;
3722 last_pic_structure = s0->picture_structure;
3723 if(h->sps.frame_mbs_only_flag){
3724 s->picture_structure= PICT_FRAME;
3726 if(get_bits1(&s->gb)) { //field_pic_flag
3727 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3729 s->picture_structure= PICT_FRAME;
3730 h->mb_aff_frame = h->sps.mb_aff;
3733 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3735 if(h0->current_slice == 0){
3736 while(h->frame_num != h->prev_frame_num &&
3737 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3738 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3740 h->prev_frame_num++;
3741 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3742 s->current_picture_ptr->frame_num= h->prev_frame_num;
3743 execute_ref_pic_marking(h, NULL, 0);
3746 /* See if we have a decoded first field looking for a pair... */
3747 if (s0->first_field) {
3748 assert(s0->current_picture_ptr);
3749 assert(s0->current_picture_ptr->data[0]);
3750 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3752 /* figure out if we have a complementary field pair */
3753 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3755 * Previous field is unmatched. Don't display it, but let it
3756 * remain for reference if marked as such.
3758 s0->current_picture_ptr = NULL;
3759 s0->first_field = FIELD_PICTURE;
3762 if (h->nal_ref_idc &&
3763 s0->current_picture_ptr->reference &&
3764 s0->current_picture_ptr->frame_num != h->frame_num) {
3766 * This and previous field were reference, but had
3767 * different frame_nums. Consider this field first in
3768 * pair. Throw away previous field except for reference
3771 s0->first_field = 1;
3772 s0->current_picture_ptr = NULL;
3775 /* Second field in complementary pair */
3776 s0->first_field = 0;
3781 /* Frame or first field in a potentially complementary pair */
3782 assert(!s0->current_picture_ptr);
3783 s0->first_field = FIELD_PICTURE;
3786 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3787 s0->first_field = 0;
3794 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3796 assert(s->mb_num == s->mb_width * s->mb_height);
3797 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3798 first_mb_in_slice >= s->mb_num){
3799 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3802 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3803 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3804 if (s->picture_structure == PICT_BOTTOM_FIELD)
3805 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3806 assert(s->mb_y < s->mb_height);
3808 if(s->picture_structure==PICT_FRAME){
3809 h->curr_pic_num= h->frame_num;
3810 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3812 h->curr_pic_num= 2*h->frame_num + 1;
3813 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3816 if(h->nal_unit_type == NAL_IDR_SLICE){
3817 get_ue_golomb(&s->gb); /* idr_pic_id */
3820 if(h->sps.poc_type==0){
3821 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3823 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3824 h->delta_poc_bottom= get_se_golomb(&s->gb);
3828 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3829 h->delta_poc[0]= get_se_golomb(&s->gb);
3831 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3832 h->delta_poc[1]= get_se_golomb(&s->gb);
3837 if(h->pps.redundant_pic_cnt_present){
3838 h->redundant_pic_count= get_ue_golomb(&s->gb);
3841 //set defaults, might be overridden a few lines later
3842 h->ref_count[0]= h->pps.ref_count[0];
3843 h->ref_count[1]= h->pps.ref_count[1];
3845 if(h->slice_type_nos != FF_I_TYPE){
3846 if(h->slice_type_nos == FF_B_TYPE){
3847 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3849 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3851 if(num_ref_idx_active_override_flag){
3852 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3853 if(h->slice_type_nos==FF_B_TYPE)
3854 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3856 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3857 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3858 h->ref_count[0]= h->ref_count[1]= 1;
3862 if(h->slice_type_nos == FF_B_TYPE)
3869 if(!default_ref_list_done){
3870 fill_default_ref_list(h);
3873 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3876 if(h->slice_type_nos!=FF_I_TYPE){
3877 s->last_picture_ptr= &h->ref_list[0][0];
3878 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3880 if(h->slice_type_nos==FF_B_TYPE){
3881 s->next_picture_ptr= &h->ref_list[1][0];
3882 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3885 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3886 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3887 pred_weight_table(h);
3888 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3889 implicit_weight_table(h);
3894 decode_ref_pic_marking(h0, &s->gb);
3897 fill_mbaff_ref_list(h);
3899 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3900 direct_dist_scale_factor(h);
3901 direct_ref_list_init(h);
3903 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3904 tmp = get_ue_golomb(&s->gb);
3906 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3909 h->cabac_init_idc= tmp;
3912 h->last_qscale_diff = 0;
3913 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3915 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3919 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3920 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3921 //FIXME qscale / qp ... stuff
3922 if(h->slice_type == FF_SP_TYPE){
3923 get_bits1(&s->gb); /* sp_for_switch_flag */
3925 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3926 get_se_golomb(&s->gb); /* slice_qs_delta */
3929 h->deblocking_filter = 1;
3930 h->slice_alpha_c0_offset = 0;
3931 h->slice_beta_offset = 0;
3932 if( h->pps.deblocking_filter_parameters_present ) {
3933 tmp= get_ue_golomb(&s->gb);
3935 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3938 h->deblocking_filter= tmp;
3939 if(h->deblocking_filter < 2)
3940 h->deblocking_filter^= 1; // 1<->0
3942 if( h->deblocking_filter ) {
3943 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3944 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3948 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3949 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3950 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3951 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3952 h->deblocking_filter= 0;
3954 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3955 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3956 /* Cheat slightly for speed:
3957 Do not bother to deblock across slices. */
3958 h->deblocking_filter = 2;
3960 h0->max_contexts = 1;
3961 if(!h0->single_decode_warning) {
3962 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3963 h0->single_decode_warning = 1;
3966 return 1; // deblocking switched inside frame
3971 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3972 slice_group_change_cycle= get_bits(&s->gb, ?);
3975 h0->last_slice_type = slice_type;
3976 h->slice_num = ++h0->current_slice;
3977 if(h->slice_num >= MAX_SLICES){
3978 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3982 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3986 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3987 +(h->ref_list[j][i].reference&3);
3990 for(i=16; i<48; i++)
3991 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3992 +(h->ref_list[j][i].reference&3);
3995 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3996 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3998 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3999 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4001 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4003 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4004 pps_id, h->frame_num,
4005 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4006 h->ref_count[0], h->ref_count[1],
4008 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4010 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4011 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4021 static inline int get_level_prefix(GetBitContext *gb){
4025 OPEN_READER(re, gb);
4026 UPDATE_CACHE(re, gb);
4027 buf=GET_CACHE(re, gb);
4029 log= 32 - av_log2(buf);
4031 print_bin(buf>>(32-log), log);
4032 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4035 LAST_SKIP_BITS(re, gb, log);
4036 CLOSE_READER(re, gb);
4041 static inline int get_dct8x8_allowed(H264Context *h){
4042 if(h->sps.direct_8x8_inference_flag)
4043 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4045 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4049 * decodes a residual block.
4050 * @param n block index
4051 * @param scantable scantable
4052 * @param max_coeff number of coefficients in the block
4053 * @return <0 if an error occurred
4055 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4056 MpegEncContext * const s = &h->s;
4057 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4059 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4061 //FIXME put trailing_onex into the context
4063 if(n == CHROMA_DC_BLOCK_INDEX){
4064 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4065 total_coeff= coeff_token>>2;
4067 if(n == LUMA_DC_BLOCK_INDEX){
4068 total_coeff= pred_non_zero_count(h, 0);
4069 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4070 total_coeff= coeff_token>>2;
4072 total_coeff= pred_non_zero_count(h, n);
4073 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4074 total_coeff= coeff_token>>2;
4075 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4079 //FIXME set last_non_zero?
4083 if(total_coeff > (unsigned)max_coeff) {
4084 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4088 trailing_ones= coeff_token&3;
4089 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4090 assert(total_coeff<=16);
4092 i = show_bits(gb, 3);
4093 skip_bits(gb, trailing_ones);
4094 level[0] = 1-((i&4)>>1);
4095 level[1] = 1-((i&2) );
4096 level[2] = 1-((i&1)<<1);
4098 if(trailing_ones<total_coeff) {
4099 int level_code, mask;
4100 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4101 int prefix= get_level_prefix(gb);
4103 //first coefficient has suffix_length equal to 0 or 1
4104 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4106 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4108 level_code= (prefix<<suffix_length); //part
4109 }else if(prefix==14){
4111 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4113 level_code= prefix + get_bits(gb, 4); //part
4115 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4116 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4118 level_code += (1<<(prefix-3))-4096;
4121 if(trailing_ones < 3) level_code += 2;
4126 mask= -(level_code&1);
4127 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4129 //remaining coefficients have suffix_length > 0
4130 for(i=trailing_ones+1;i<total_coeff;i++) {
4131 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4132 prefix = get_level_prefix(gb);
4134 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4136 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4138 level_code += (1<<(prefix-3))-4096;
4140 mask= -(level_code&1);
4141 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4142 if(level_code > suffix_limit[suffix_length])
4147 if(total_coeff == max_coeff)
4150 if(n == CHROMA_DC_BLOCK_INDEX)
4151 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4153 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4156 coeff_num = zeros_left + total_coeff - 1;
4157 j = scantable[coeff_num];
4159 block[j] = level[0];
4160 for(i=1;i<total_coeff;i++) {
4163 else if(zeros_left < 7){
4164 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4166 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4168 zeros_left -= run_before;
4169 coeff_num -= 1 + run_before;
4170 j= scantable[ coeff_num ];
4175 block[j] = (level[0] * qmul[j] + 32)>>6;
4176 for(i=1;i<total_coeff;i++) {
4179 else if(zeros_left < 7){
4180 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4182 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4184 zeros_left -= run_before;
4185 coeff_num -= 1 + run_before;
4186 j= scantable[ coeff_num ];
4188 block[j]= (level[i] * qmul[j] + 32)>>6;
4193 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4200 static void predict_field_decoding_flag(H264Context *h){
4201 MpegEncContext * const s = &h->s;
4202 const int mb_xy= h->mb_xy;
4203 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4204 ? s->current_picture.mb_type[mb_xy-1]
4205 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4206 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4208 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4212 * decodes a P_SKIP or B_SKIP macroblock
4214 static void decode_mb_skip(H264Context *h){
4215 MpegEncContext * const s = &h->s;
4216 const int mb_xy= h->mb_xy;
4219 memset(h->non_zero_count[mb_xy], 0, 16);
4220 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4223 mb_type|= MB_TYPE_INTERLACED;
4225 if( h->slice_type_nos == FF_B_TYPE )
4227 // just for fill_caches. pred_direct_motion will set the real mb_type
4228 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4230 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4231 pred_direct_motion(h, &mb_type);
4232 mb_type|= MB_TYPE_SKIP;
4237 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4239 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4240 pred_pskip_motion(h, &mx, &my);
4241 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4242 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4245 write_back_motion(h, mb_type);
4246 s->current_picture.mb_type[mb_xy]= mb_type;
4247 s->current_picture.qscale_table[mb_xy]= s->qscale;
4248 h->slice_table[ mb_xy ]= h->slice_num;
4249 h->prev_mb_skipped= 1;
4253 * decodes a macroblock
4254 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4256 static int decode_mb_cavlc(H264Context *h){
4257 MpegEncContext * const s = &h->s;
4259 int partition_count;
4260 unsigned int mb_type, cbp;
4261 int dct8x8_allowed= h->pps.transform_8x8_mode;
4263 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4265 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4267 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4268 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4270 if(h->slice_type_nos != FF_I_TYPE){
4271 if(s->mb_skip_run==-1)
4272 s->mb_skip_run= get_ue_golomb(&s->gb);
4274 if (s->mb_skip_run--) {
4275 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4276 if(s->mb_skip_run==0)
4277 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4279 predict_field_decoding_flag(h);
4286 if( (s->mb_y&1) == 0 )
4287 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4290 h->prev_mb_skipped= 0;
4292 mb_type= get_ue_golomb(&s->gb);
4293 if(h->slice_type_nos == FF_B_TYPE){
4295 partition_count= b_mb_type_info[mb_type].partition_count;
4296 mb_type= b_mb_type_info[mb_type].type;
4299 goto decode_intra_mb;
4301 }else if(h->slice_type_nos == FF_P_TYPE){
4303 partition_count= p_mb_type_info[mb_type].partition_count;
4304 mb_type= p_mb_type_info[mb_type].type;
4307 goto decode_intra_mb;
4310 assert(h->slice_type_nos == FF_I_TYPE);
4311 if(h->slice_type == FF_SI_TYPE && mb_type)
4315 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4319 cbp= i_mb_type_info[mb_type].cbp;
4320 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4321 mb_type= i_mb_type_info[mb_type].type;
4325 mb_type |= MB_TYPE_INTERLACED;
4327 h->slice_table[ mb_xy ]= h->slice_num;
4329 if(IS_INTRA_PCM(mb_type)){
4332 // We assume these blocks are very rare so we do not optimize it.
4333 align_get_bits(&s->gb);
4335 // The pixels are stored in the same order as levels in h->mb array.
4336 for(x=0; x < (CHROMA ? 384 : 256); x++){
4337 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4340 // In deblocking, the quantizer is 0
4341 s->current_picture.qscale_table[mb_xy]= 0;
4342 // All coeffs are present
4343 memset(h->non_zero_count[mb_xy], 16, 16);
4345 s->current_picture.mb_type[mb_xy]= mb_type;
4350 h->ref_count[0] <<= 1;
4351 h->ref_count[1] <<= 1;
4354 fill_caches(h, mb_type, 0);
4357 if(IS_INTRA(mb_type)){
4359 // init_top_left_availability(h);
4360 if(IS_INTRA4x4(mb_type)){
4363 if(dct8x8_allowed && get_bits1(&s->gb)){
4364 mb_type |= MB_TYPE_8x8DCT;
4368 // fill_intra4x4_pred_table(h);
4369 for(i=0; i<16; i+=di){
4370 int mode= pred_intra_mode(h, i);
4372 if(!get_bits1(&s->gb)){
4373 const int rem_mode= get_bits(&s->gb, 3);
4374 mode = rem_mode + (rem_mode >= mode);
4378 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4380 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4382 write_back_intra_pred_mode(h);
4383 if( check_intra4x4_pred_mode(h) < 0)
4386 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4387 if(h->intra16x16_pred_mode < 0)
4391 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4394 h->chroma_pred_mode= pred_mode;
4396 }else if(partition_count==4){
4397 int i, j, sub_partition_count[4], list, ref[2][4];
4399 if(h->slice_type_nos == FF_B_TYPE){
4401 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4402 if(h->sub_mb_type[i] >=13){
4403 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4406 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4407 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4409 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4410 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4411 pred_direct_motion(h, &mb_type);
4412 h->ref_cache[0][scan8[4]] =
4413 h->ref_cache[1][scan8[4]] =
4414 h->ref_cache[0][scan8[12]] =
4415 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4418 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4420 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4421 if(h->sub_mb_type[i] >=4){
4422 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4425 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4426 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4430 for(list=0; list<h->list_count; list++){
4431 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4433 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4434 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4435 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4437 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4449 dct8x8_allowed = get_dct8x8_allowed(h);
4451 for(list=0; list<h->list_count; list++){
4453 if(IS_DIRECT(h->sub_mb_type[i])) {
4454 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4457 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4458 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4460 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4461 const int sub_mb_type= h->sub_mb_type[i];
4462 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4463 for(j=0; j<sub_partition_count[i]; j++){
4465 const int index= 4*i + block_width*j;
4466 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4467 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4468 mx += get_se_golomb(&s->gb);
4469 my += get_se_golomb(&s->gb);
4470 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4472 if(IS_SUB_8X8(sub_mb_type)){
4474 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4476 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4477 }else if(IS_SUB_8X4(sub_mb_type)){
4478 mv_cache[ 1 ][0]= mx;
4479 mv_cache[ 1 ][1]= my;
4480 }else if(IS_SUB_4X8(sub_mb_type)){
4481 mv_cache[ 8 ][0]= mx;
4482 mv_cache[ 8 ][1]= my;
4484 mv_cache[ 0 ][0]= mx;
4485 mv_cache[ 0 ][1]= my;
4488 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4494 }else if(IS_DIRECT(mb_type)){
4495 pred_direct_motion(h, &mb_type);
4496 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4498 int list, mx, my, i;
4499 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4500 if(IS_16X16(mb_type)){
4501 for(list=0; list<h->list_count; list++){
4503 if(IS_DIR(mb_type, 0, list)){
4504 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4505 if(val >= h->ref_count[list]){
4506 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4510 val= LIST_NOT_USED&0xFF;
4511 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4513 for(list=0; list<h->list_count; list++){
4515 if(IS_DIR(mb_type, 0, list)){
4516 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4517 mx += get_se_golomb(&s->gb);
4518 my += get_se_golomb(&s->gb);
4519 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4521 val= pack16to32(mx,my);
4524 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4527 else if(IS_16X8(mb_type)){
4528 for(list=0; list<h->list_count; list++){
4531 if(IS_DIR(mb_type, i, list)){
4532 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4533 if(val >= h->ref_count[list]){
4534 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4538 val= LIST_NOT_USED&0xFF;
4539 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4542 for(list=0; list<h->list_count; list++){
4545 if(IS_DIR(mb_type, i, list)){
4546 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4547 mx += get_se_golomb(&s->gb);
4548 my += get_se_golomb(&s->gb);
4549 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4551 val= pack16to32(mx,my);
4554 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4558 assert(IS_8X16(mb_type));
4559 for(list=0; list<h->list_count; list++){
4562 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4563 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4564 if(val >= h->ref_count[list]){
4565 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4569 val= LIST_NOT_USED&0xFF;
4570 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4573 for(list=0; list<h->list_count; list++){
4576 if(IS_DIR(mb_type, i, list)){
4577 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4578 mx += get_se_golomb(&s->gb);
4579 my += get_se_golomb(&s->gb);
4580 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4582 val= pack16to32(mx,my);
4585 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4591 if(IS_INTER(mb_type))
4592 write_back_motion(h, mb_type);
4594 if(!IS_INTRA16x16(mb_type)){
4595 cbp= get_ue_golomb(&s->gb);
4597 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4602 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4603 else cbp= golomb_to_inter_cbp [cbp];
4605 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4606 else cbp= golomb_to_inter_cbp_gray[cbp];
4611 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4612 if(get_bits1(&s->gb)){
4613 mb_type |= MB_TYPE_8x8DCT;
4614 h->cbp_table[mb_xy]= cbp;
4617 s->current_picture.mb_type[mb_xy]= mb_type;
4619 if(cbp || IS_INTRA16x16(mb_type)){
4620 int i8x8, i4x4, chroma_idx;
4622 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4623 const uint8_t *scan, *scan8x8, *dc_scan;
4625 // fill_non_zero_count_cache(h);
4627 if(IS_INTERLACED(mb_type)){
4628 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4629 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4630 dc_scan= luma_dc_field_scan;
4632 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4633 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4634 dc_scan= luma_dc_zigzag_scan;
4637 dquant= get_se_golomb(&s->gb);
4639 if( dquant > 25 || dquant < -26 ){
4640 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4644 s->qscale += dquant;
4645 if(((unsigned)s->qscale) > 51){
4646 if(s->qscale<0) s->qscale+= 52;
4647 else s->qscale-= 52;
4650 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4651 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4652 if(IS_INTRA16x16(mb_type)){
4653 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4654 return -1; //FIXME continue if partitioned and other return -1 too
4657 assert((cbp&15) == 0 || (cbp&15) == 15);
4660 for(i8x8=0; i8x8<4; i8x8++){
4661 for(i4x4=0; i4x4<4; i4x4++){
4662 const int index= i4x4 + 4*i8x8;
4663 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4669 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4672 for(i8x8=0; i8x8<4; i8x8++){
4673 if(cbp & (1<<i8x8)){
4674 if(IS_8x8DCT(mb_type)){
4675 DCTELEM *buf = &h->mb[64*i8x8];
4677 for(i4x4=0; i4x4<4; i4x4++){
4678 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4679 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4682 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4683 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4685 for(i4x4=0; i4x4<4; i4x4++){
4686 const int index= i4x4 + 4*i8x8;
4688 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4694 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4695 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4701 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4702 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4708 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4709 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4710 for(i4x4=0; i4x4<4; i4x4++){
4711 const int index= 16 + 4*chroma_idx + i4x4;
4712 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4718 uint8_t * const nnz= &h->non_zero_count_cache[0];
4719 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4720 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4723 uint8_t * const nnz= &h->non_zero_count_cache[0];
4724 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4725 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4726 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4728 s->current_picture.qscale_table[mb_xy]= s->qscale;
4729 write_back_non_zero_count(h);
4732 h->ref_count[0] >>= 1;
4733 h->ref_count[1] >>= 1;
4739 static int decode_cabac_field_decoding_flag(H264Context *h) {
4740 MpegEncContext * const s = &h->s;
4741 const int mb_x = s->mb_x;
4742 const int mb_y = s->mb_y & ~1;
4743 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4744 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4746 unsigned int ctx = 0;
4748 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4751 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4755 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4758 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4759 uint8_t *state= &h->cabac_state[ctx_base];
4763 MpegEncContext * const s = &h->s;
4764 const int mba_xy = h->left_mb_xy[0];
4765 const int mbb_xy = h->top_mb_xy;
4767 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4769 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4771 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4772 return 0; /* I4x4 */
4775 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4776 return 0; /* I4x4 */
4779 if( get_cabac_terminate( &h->cabac ) )
4780 return 25; /* PCM */
4782 mb_type = 1; /* I16x16 */
4783 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4784 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4785 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4786 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4787 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4791 static int decode_cabac_mb_type( H264Context *h ) {
4792 MpegEncContext * const s = &h->s;
4794 if( h->slice_type_nos == FF_I_TYPE ) {
4795 return decode_cabac_intra_mb_type(h, 3, 1);
4796 } else if( h->slice_type_nos == FF_P_TYPE ) {
4797 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4799 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4800 /* P_L0_D16x16, P_8x8 */
4801 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4803 /* P_L0_D8x16, P_L0_D16x8 */
4804 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4807 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4810 const int mba_xy = h->left_mb_xy[0];
4811 const int mbb_xy = h->top_mb_xy;
4814 assert(h->slice_type_nos == FF_B_TYPE);
4816 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4818 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4821 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4822 return 0; /* B_Direct_16x16 */
4824 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4825 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4828 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4829 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4830 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4831 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4833 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4834 else if( bits == 13 ) {
4835 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4836 } else if( bits == 14 )
4837 return 11; /* B_L1_L0_8x16 */
4838 else if( bits == 15 )
4839 return 22; /* B_8x8 */
4841 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4842 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4846 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4847 MpegEncContext * const s = &h->s;
4851 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4852 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4855 && h->slice_table[mba_xy] == h->slice_num
4856 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4857 mba_xy += s->mb_stride;
4859 mbb_xy = mb_xy - s->mb_stride;
4861 && h->slice_table[mbb_xy] == h->slice_num
4862 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4863 mbb_xy -= s->mb_stride;
4865 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4867 int mb_xy = h->mb_xy;
4869 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4872 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4874 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4877 if( h->slice_type_nos == FF_B_TYPE )
4879 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4882 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4885 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4888 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4889 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4890 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4892 if( mode >= pred_mode )
4898 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4899 const int mba_xy = h->left_mb_xy[0];
4900 const int mbb_xy = h->top_mb_xy;
4904 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4905 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4908 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4911 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4914 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4916 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4922 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4923 int cbp_b, cbp_a, ctx, cbp = 0;
4925 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4926 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4928 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4929 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4930 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4931 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4932 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4933 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4934 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4935 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4938 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4942 cbp_a = (h->left_cbp>>4)&0x03;
4943 cbp_b = (h-> top_cbp>>4)&0x03;
4946 if( cbp_a > 0 ) ctx++;
4947 if( cbp_b > 0 ) ctx += 2;
4948 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4952 if( cbp_a == 2 ) ctx++;
4953 if( cbp_b == 2 ) ctx += 2;
4954 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4956 static int decode_cabac_mb_dqp( H264Context *h) {
4957 int ctx= h->last_qscale_diff != 0;
4960 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4963 if(val > 102) //prevent infinite loop
4968 return (val + 1)>>1 ;
4970 return -((val + 1)>>1);
4972 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4973 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4975 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4977 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4981 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4983 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4984 return 0; /* B_Direct_8x8 */
4985 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4986 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4988 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4989 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4990 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4993 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4994 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4998 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4999 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5002 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5003 int refa = h->ref_cache[list][scan8[n] - 1];
5004 int refb = h->ref_cache[list][scan8[n] - 8];
5008 if( h->slice_type_nos == FF_B_TYPE) {
5009 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5011 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5020 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5026 if(ref >= 32 /*h->ref_list[list]*/){
5033 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5034 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5035 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5036 int ctxbase = (l == 0) ? 40 : 47;
5038 int ctx = (amvd>2) + (amvd>32);
5040 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5045 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5053 while( get_cabac_bypass( &h->cabac ) ) {
5057 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5062 if( get_cabac_bypass( &h->cabac ) )
5066 return get_cabac_bypass_sign( &h->cabac, -mvd );
5069 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5075 nza = h->left_cbp&0x100;
5076 nzb = h-> top_cbp&0x100;
5078 nza = (h->left_cbp>>(6+idx))&0x01;
5079 nzb = (h-> top_cbp>>(6+idx))&0x01;
5082 assert(cat == 1 || cat == 2 || cat == 4);
5083 nza = h->non_zero_count_cache[scan8[idx] - 1];
5084 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5093 return ctx + 4 * cat;
5096 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5097 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5098 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5099 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5100 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5103 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5104 static const int significant_coeff_flag_offset[2][6] = {
5105 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5106 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5108 static const int last_coeff_flag_offset[2][6] = {
5109 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5110 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5112 static const int coeff_abs_level_m1_offset[6] = {
5113 227+0, 227+10, 227+20, 227+30, 227+39, 426
5115 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5116 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5117 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5118 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5119 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5120 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5121 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5122 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5123 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5125 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5126 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5127 * map node ctx => cabac ctx for level=1 */
5128 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5129 /* map node ctx => cabac ctx for level>1 */
5130 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5131 static const uint8_t coeff_abs_level_transition[2][8] = {
5132 /* update node ctx after decoding a level=1 */
5133 { 1, 2, 3, 3, 4, 5, 6, 7 },
5134 /* update node ctx after decoding a level>1 */
5135 { 4, 4, 4, 4, 5, 6, 7, 7 }
5141 int coeff_count = 0;
5144 uint8_t *significant_coeff_ctx_base;
5145 uint8_t *last_coeff_ctx_base;
5146 uint8_t *abs_level_m1_ctx_base;
5149 #define CABAC_ON_STACK
5151 #ifdef CABAC_ON_STACK
5154 cc.range = h->cabac.range;
5155 cc.low = h->cabac.low;
5156 cc.bytestream= h->cabac.bytestream;
5158 #define CC &h->cabac
5162 /* cat: 0-> DC 16x16 n = 0
5163 * 1-> AC 16x16 n = luma4x4idx
5164 * 2-> Luma4x4 n = luma4x4idx
5165 * 3-> DC Chroma n = iCbCr
5166 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5167 * 5-> Luma8x8 n = 4 * luma8x8idx
5170 /* read coded block flag */
5171 if( is_dc || cat != 5 ) {
5172 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5174 h->non_zero_count_cache[scan8[n]] = 0;
5176 #ifdef CABAC_ON_STACK
5177 h->cabac.range = cc.range ;
5178 h->cabac.low = cc.low ;
5179 h->cabac.bytestream= cc.bytestream;
5185 significant_coeff_ctx_base = h->cabac_state
5186 + significant_coeff_flag_offset[MB_FIELD][cat];
5187 last_coeff_ctx_base = h->cabac_state
5188 + last_coeff_flag_offset[MB_FIELD][cat];
5189 abs_level_m1_ctx_base = h->cabac_state
5190 + coeff_abs_level_m1_offset[cat];
5192 if( !is_dc && cat == 5 ) {
5193 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5194 for(last= 0; last < coefs; last++) { \
5195 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5196 if( get_cabac( CC, sig_ctx )) { \
5197 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5198 index[coeff_count++] = last; \
5199 if( get_cabac( CC, last_ctx ) ) { \
5205 if( last == max_coeff -1 ) {\
5206 index[coeff_count++] = last;\
5208 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5209 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5210 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5212 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5214 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5216 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5219 assert(coeff_count > 0);
5223 h->cbp_table[h->mb_xy] |= 0x100;
5225 h->cbp_table[h->mb_xy] |= 0x40 << n;
5228 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5230 assert( cat == 1 || cat == 2 || cat == 4 );
5231 h->non_zero_count_cache[scan8[n]] = coeff_count;
5236 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5238 int j= scantable[index[--coeff_count]];
5240 if( get_cabac( CC, ctx ) == 0 ) {
5241 node_ctx = coeff_abs_level_transition[0][node_ctx];
5243 block[j] = get_cabac_bypass_sign( CC, -1);
5245 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5249 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5250 node_ctx = coeff_abs_level_transition[1][node_ctx];
5252 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5256 if( coeff_abs >= 15 ) {
5258 while( get_cabac_bypass( CC ) ) {
5264 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5270 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5272 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5275 } while( coeff_count );
5276 #ifdef CABAC_ON_STACK
5277 h->cabac.range = cc.range ;
5278 h->cabac.low = cc.low ;
5279 h->cabac.bytestream= cc.bytestream;
5284 #ifndef CONFIG_SMALL
5285 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5286 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5289 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5290 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5294 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5296 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5298 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5299 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5303 static inline void compute_mb_neighbors(H264Context *h)
5305 MpegEncContext * const s = &h->s;
5306 const int mb_xy = h->mb_xy;
5307 h->top_mb_xy = mb_xy - s->mb_stride;
5308 h->left_mb_xy[0] = mb_xy - 1;
5310 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5311 const int top_pair_xy = pair_xy - s->mb_stride;
5312 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5313 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5314 const int curr_mb_frame_flag = !MB_FIELD;
5315 const int bottom = (s->mb_y & 1);
5317 ? !curr_mb_frame_flag // bottom macroblock
5318 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5320 h->top_mb_xy -= s->mb_stride;
5322 if (left_mb_frame_flag != curr_mb_frame_flag) {
5323 h->left_mb_xy[0] = pair_xy - 1;
5325 } else if (FIELD_PICTURE) {
5326 h->top_mb_xy -= s->mb_stride;
5332 * decodes a macroblock
5333 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5335 static int decode_mb_cabac(H264Context *h) {
5336 MpegEncContext * const s = &h->s;
5338 int mb_type, partition_count, cbp = 0;
5339 int dct8x8_allowed= h->pps.transform_8x8_mode;
5341 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5343 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5345 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5346 if( h->slice_type_nos != FF_I_TYPE ) {
5348 /* a skipped mb needs the aff flag from the following mb */
5349 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5350 predict_field_decoding_flag(h);
5351 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5352 skip = h->next_mb_skipped;
5354 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5355 /* read skip flags */
5357 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5358 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5359 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5360 if(h->next_mb_skipped)
5361 predict_field_decoding_flag(h);
5363 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5368 h->cbp_table[mb_xy] = 0;
5369 h->chroma_pred_mode_table[mb_xy] = 0;
5370 h->last_qscale_diff = 0;
5377 if( (s->mb_y&1) == 0 )
5379 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5382 h->prev_mb_skipped = 0;
5384 compute_mb_neighbors(h);
5385 mb_type = decode_cabac_mb_type( h );
5386 assert(mb_type >= 0);
5388 if( h->slice_type_nos == FF_B_TYPE ) {
5390 partition_count= b_mb_type_info[mb_type].partition_count;
5391 mb_type= b_mb_type_info[mb_type].type;
5394 goto decode_intra_mb;
5396 } else if( h->slice_type_nos == FF_P_TYPE ) {
5398 partition_count= p_mb_type_info[mb_type].partition_count;
5399 mb_type= p_mb_type_info[mb_type].type;
5402 goto decode_intra_mb;
5405 if(h->slice_type == FF_SI_TYPE && mb_type)
5407 assert(h->slice_type_nos == FF_I_TYPE);
5409 partition_count = 0;
5410 cbp= i_mb_type_info[mb_type].cbp;
5411 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5412 mb_type= i_mb_type_info[mb_type].type;
5415 mb_type |= MB_TYPE_INTERLACED;
5417 h->slice_table[ mb_xy ]= h->slice_num;
5419 if(IS_INTRA_PCM(mb_type)) {
5422 // We assume these blocks are very rare so we do not optimize it.
5423 // FIXME The two following lines get the bitstream position in the cabac
5424 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5425 ptr= h->cabac.bytestream;
5426 if(h->cabac.low&0x1) ptr--;
5428 if(h->cabac.low&0x1FF) ptr--;
5431 // The pixels are stored in the same order as levels in h->mb array.
5432 memcpy(h->mb, ptr, 256); ptr+=256;
5434 memcpy(h->mb+128, ptr, 128); ptr+=128;
5437 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5439 // All blocks are present
5440 h->cbp_table[mb_xy] = 0x1ef;
5441 h->chroma_pred_mode_table[mb_xy] = 0;
5442 // In deblocking, the quantizer is 0
5443 s->current_picture.qscale_table[mb_xy]= 0;
5444 // All coeffs are present
5445 memset(h->non_zero_count[mb_xy], 16, 16);
5446 s->current_picture.mb_type[mb_xy]= mb_type;
5447 h->last_qscale_diff = 0;
5452 h->ref_count[0] <<= 1;
5453 h->ref_count[1] <<= 1;
5456 fill_caches(h, mb_type, 0);
5458 if( IS_INTRA( mb_type ) ) {
5460 if( IS_INTRA4x4( mb_type ) ) {
5461 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5462 mb_type |= MB_TYPE_8x8DCT;
5463 for( i = 0; i < 16; i+=4 ) {
5464 int pred = pred_intra_mode( h, i );
5465 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5466 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5469 for( i = 0; i < 16; i++ ) {
5470 int pred = pred_intra_mode( h, i );
5471 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5473 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5476 write_back_intra_pred_mode(h);
5477 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5479 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5480 if( h->intra16x16_pred_mode < 0 ) return -1;
5483 h->chroma_pred_mode_table[mb_xy] =
5484 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5486 pred_mode= check_intra_pred_mode( h, pred_mode );
5487 if( pred_mode < 0 ) return -1;
5488 h->chroma_pred_mode= pred_mode;
5490 } else if( partition_count == 4 ) {
5491 int i, j, sub_partition_count[4], list, ref[2][4];
5493 if( h->slice_type_nos == FF_B_TYPE ) {
5494 for( i = 0; i < 4; i++ ) {
5495 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5496 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5497 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5499 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5500 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5501 pred_direct_motion(h, &mb_type);
5502 h->ref_cache[0][scan8[4]] =
5503 h->ref_cache[1][scan8[4]] =
5504 h->ref_cache[0][scan8[12]] =
5505 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5506 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5507 for( i = 0; i < 4; i++ )
5508 if( IS_DIRECT(h->sub_mb_type[i]) )
5509 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5513 for( i = 0; i < 4; i++ ) {
5514 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5515 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5516 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5520 for( list = 0; list < h->list_count; list++ ) {
5521 for( i = 0; i < 4; i++ ) {
5522 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5523 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5524 if( h->ref_count[list] > 1 ){
5525 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5526 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5527 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5535 h->ref_cache[list][ scan8[4*i]+1 ]=
5536 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5541 dct8x8_allowed = get_dct8x8_allowed(h);
5543 for(list=0; list<h->list_count; list++){
5545 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5546 if(IS_DIRECT(h->sub_mb_type[i])){
5547 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5551 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5552 const int sub_mb_type= h->sub_mb_type[i];
5553 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5554 for(j=0; j<sub_partition_count[i]; j++){
5557 const int index= 4*i + block_width*j;
5558 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5559 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5560 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5562 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5563 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5564 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5566 if(IS_SUB_8X8(sub_mb_type)){
5568 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5570 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5573 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5575 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5576 }else if(IS_SUB_8X4(sub_mb_type)){
5577 mv_cache[ 1 ][0]= mx;
5578 mv_cache[ 1 ][1]= my;
5580 mvd_cache[ 1 ][0]= mx - mpx;
5581 mvd_cache[ 1 ][1]= my - mpy;
5582 }else if(IS_SUB_4X8(sub_mb_type)){
5583 mv_cache[ 8 ][0]= mx;
5584 mv_cache[ 8 ][1]= my;
5586 mvd_cache[ 8 ][0]= mx - mpx;
5587 mvd_cache[ 8 ][1]= my - mpy;
5589 mv_cache[ 0 ][0]= mx;
5590 mv_cache[ 0 ][1]= my;
5592 mvd_cache[ 0 ][0]= mx - mpx;
5593 mvd_cache[ 0 ][1]= my - mpy;
5596 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5597 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5598 p[0] = p[1] = p[8] = p[9] = 0;
5599 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5603 } else if( IS_DIRECT(mb_type) ) {
5604 pred_direct_motion(h, &mb_type);
5605 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5606 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5607 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5609 int list, mx, my, i, mpx, mpy;
5610 if(IS_16X16(mb_type)){
5611 for(list=0; list<h->list_count; list++){
5612 if(IS_DIR(mb_type, 0, list)){
5614 if(h->ref_count[list] > 1){
5615 ref= decode_cabac_mb_ref(h, list, 0);
5616 if(ref >= (unsigned)h->ref_count[list]){
5617 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5622 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5624 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5626 for(list=0; list<h->list_count; list++){
5627 if(IS_DIR(mb_type, 0, list)){
5628 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5630 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5631 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5632 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5634 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5635 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5637 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5640 else if(IS_16X8(mb_type)){
5641 for(list=0; list<h->list_count; list++){
5643 if(IS_DIR(mb_type, i, list)){
5645 if(h->ref_count[list] > 1){
5646 ref= decode_cabac_mb_ref( h, list, 8*i );
5647 if(ref >= (unsigned)h->ref_count[list]){
5648 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5653 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5655 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5658 for(list=0; list<h->list_count; list++){
5660 if(IS_DIR(mb_type, i, list)){
5661 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5662 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5663 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5664 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5666 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5667 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5669 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5670 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5675 assert(IS_8X16(mb_type));
5676 for(list=0; list<h->list_count; list++){
5678 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5680 if(h->ref_count[list] > 1){
5681 ref= decode_cabac_mb_ref( h, list, 4*i );
5682 if(ref >= (unsigned)h->ref_count[list]){
5683 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5688 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5690 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5693 for(list=0; list<h->list_count; list++){
5695 if(IS_DIR(mb_type, i, list)){
5696 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5697 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5698 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5700 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5701 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5702 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5704 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5705 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5712 if( IS_INTER( mb_type ) ) {
5713 h->chroma_pred_mode_table[mb_xy] = 0;
5714 write_back_motion( h, mb_type );
5717 if( !IS_INTRA16x16( mb_type ) ) {
5718 cbp = decode_cabac_mb_cbp_luma( h );
5720 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5723 h->cbp_table[mb_xy] = h->cbp = cbp;
5725 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5726 if( decode_cabac_mb_transform_size( h ) )
5727 mb_type |= MB_TYPE_8x8DCT;
5729 s->current_picture.mb_type[mb_xy]= mb_type;
5731 if( cbp || IS_INTRA16x16( mb_type ) ) {
5732 const uint8_t *scan, *scan8x8, *dc_scan;
5733 const uint32_t *qmul;
5736 if(IS_INTERLACED(mb_type)){
5737 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5738 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5739 dc_scan= luma_dc_field_scan;
5741 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5742 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5743 dc_scan= luma_dc_zigzag_scan;
5746 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5747 if( dqp == INT_MIN ){
5748 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5752 if(((unsigned)s->qscale) > 51){
5753 if(s->qscale<0) s->qscale+= 52;
5754 else s->qscale-= 52;
5756 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5757 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5759 if( IS_INTRA16x16( mb_type ) ) {
5761 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5762 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5765 qmul = h->dequant4_coeff[0][s->qscale];
5766 for( i = 0; i < 16; i++ ) {
5767 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5768 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5771 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5775 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5776 if( cbp & (1<<i8x8) ) {
5777 if( IS_8x8DCT(mb_type) ) {
5778 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5779 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5781 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5782 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5783 const int index = 4*i8x8 + i4x4;
5784 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5786 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5787 //STOP_TIMER("decode_residual")
5791 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5792 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5799 for( c = 0; c < 2; c++ ) {
5800 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5801 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5807 for( c = 0; c < 2; c++ ) {
5808 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5809 for( i = 0; i < 4; i++ ) {
5810 const int index = 16 + 4 * c + i;
5811 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5812 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5816 uint8_t * const nnz= &h->non_zero_count_cache[0];
5817 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5818 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5821 uint8_t * const nnz= &h->non_zero_count_cache[0];
5822 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5823 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5824 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5825 h->last_qscale_diff = 0;
5828 s->current_picture.qscale_table[mb_xy]= s->qscale;
5829 write_back_non_zero_count(h);
5832 h->ref_count[0] >>= 1;
5833 h->ref_count[1] >>= 1;
5840 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5842 const int index_a = qp + h->slice_alpha_c0_offset;
5843 const int alpha = (alpha_table+52)[index_a];
5844 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5848 tc[0] = (tc0_table+52)[index_a][bS[0]];
5849 tc[1] = (tc0_table+52)[index_a][bS[1]];
5850 tc[2] = (tc0_table+52)[index_a][bS[2]];
5851 tc[3] = (tc0_table+52)[index_a][bS[3]];
5852 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5854 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5857 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5859 const int index_a = qp + h->slice_alpha_c0_offset;
5860 const int alpha = (alpha_table+52)[index_a];
5861 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5865 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5866 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5867 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5868 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5869 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5871 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5875 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5877 for( i = 0; i < 16; i++, pix += stride) {
5883 int bS_index = (i >> 1);
5886 bS_index |= (i & 1);
5889 if( bS[bS_index] == 0 ) {
5893 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5894 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5895 alpha = (alpha_table+52)[index_a];
5896 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5898 if( bS[bS_index] < 4 ) {
5899 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5900 const int p0 = pix[-1];
5901 const int p1 = pix[-2];
5902 const int p2 = pix[-3];
5903 const int q0 = pix[0];
5904 const int q1 = pix[1];
5905 const int q2 = pix[2];
5907 if( FFABS( p0 - q0 ) < alpha &&
5908 FFABS( p1 - p0 ) < beta &&
5909 FFABS( q1 - q0 ) < beta ) {
5913 if( FFABS( p2 - p0 ) < beta ) {
5914 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5917 if( FFABS( q2 - q0 ) < beta ) {
5918 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5922 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5923 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5924 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5925 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5928 const int p0 = pix[-1];
5929 const int p1 = pix[-2];
5930 const int p2 = pix[-3];
5932 const int q0 = pix[0];
5933 const int q1 = pix[1];
5934 const int q2 = pix[2];
5936 if( FFABS( p0 - q0 ) < alpha &&
5937 FFABS( p1 - p0 ) < beta &&
5938 FFABS( q1 - q0 ) < beta ) {
5940 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5941 if( FFABS( p2 - p0 ) < beta)
5943 const int p3 = pix[-4];
5945 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5946 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5947 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5950 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5952 if( FFABS( q2 - q0 ) < beta)
5954 const int q3 = pix[3];
5956 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5957 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5958 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5961 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5965 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5966 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5968 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5973 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5975 for( i = 0; i < 8; i++, pix += stride) {
5983 if( bS[bS_index] == 0 ) {
5987 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5988 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5989 alpha = (alpha_table+52)[index_a];
5990 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5992 if( bS[bS_index] < 4 ) {
5993 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
5994 const int p0 = pix[-1];
5995 const int p1 = pix[-2];
5996 const int q0 = pix[0];
5997 const int q1 = pix[1];
5999 if( FFABS( p0 - q0 ) < alpha &&
6000 FFABS( p1 - p0 ) < beta &&
6001 FFABS( q1 - q0 ) < beta ) {
6002 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6004 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6005 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6006 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6009 const int p0 = pix[-1];
6010 const int p1 = pix[-2];
6011 const int q0 = pix[0];
6012 const int q1 = pix[1];
6014 if( FFABS( p0 - q0 ) < alpha &&
6015 FFABS( p1 - p0 ) < beta &&
6016 FFABS( q1 - q0 ) < beta ) {
6018 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6019 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6020 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6026 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6028 const int index_a = qp + h->slice_alpha_c0_offset;
6029 const int alpha = (alpha_table+52)[index_a];
6030 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6031 const int pix_next = stride;
6035 tc[0] = (tc0_table+52)[index_a][bS[0]];
6036 tc[1] = (tc0_table+52)[index_a][bS[1]];
6037 tc[2] = (tc0_table+52)[index_a][bS[2]];
6038 tc[3] = (tc0_table+52)[index_a][bS[3]];
6039 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6041 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6045 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6047 const int index_a = qp + h->slice_alpha_c0_offset;
6048 const int alpha = (alpha_table+52)[index_a];
6049 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6053 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6054 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6055 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6056 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6057 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6059 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6063 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6064 MpegEncContext * const s = &h->s;
6065 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6067 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6071 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6072 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6073 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6074 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6075 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6078 assert(!FRAME_MBAFF);
6080 mb_type = s->current_picture.mb_type[mb_xy];
6081 qp = s->current_picture.qscale_table[mb_xy];
6082 qp0 = s->current_picture.qscale_table[mb_xy-1];
6083 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6084 qpc = get_chroma_qp( h, 0, qp );
6085 qpc0 = get_chroma_qp( h, 0, qp0 );
6086 qpc1 = get_chroma_qp( h, 0, qp1 );
6087 qp0 = (qp + qp0 + 1) >> 1;
6088 qp1 = (qp + qp1 + 1) >> 1;
6089 qpc0 = (qpc + qpc0 + 1) >> 1;
6090 qpc1 = (qpc + qpc1 + 1) >> 1;
6091 qp_thresh = 15 - h->slice_alpha_c0_offset;
6092 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6093 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6096 if( IS_INTRA(mb_type) ) {
6097 int16_t bS4[4] = {4,4,4,4};
6098 int16_t bS3[4] = {3,3,3,3};
6099 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6100 if( IS_8x8DCT(mb_type) ) {
6101 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6102 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6103 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6104 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6106 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6107 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6108 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6109 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6110 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6111 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6112 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6113 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6115 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6116 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6117 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6118 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6119 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6120 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6121 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6122 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6125 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6126 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6128 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6130 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6132 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6133 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6134 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6135 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6137 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6138 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6139 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6140 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6142 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6143 bSv[0][0] = 0x0004000400040004ULL;
6144 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6145 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6147 #define FILTER(hv,dir,edge)\
6148 if(bSv[dir][edge]) {\
6149 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6151 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6152 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6158 } else if( IS_8x8DCT(mb_type) ) {
6178 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6179 MpegEncContext * const s = &h->s;
6181 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6182 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6183 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6184 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6185 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6187 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6188 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6189 // how often to recheck mv-based bS when iterating between edges
6190 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6191 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6192 // how often to recheck mv-based bS when iterating along each edge
6193 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6195 if (first_vertical_edge_done) {
6199 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6202 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6203 && !IS_INTERLACED(mb_type)
6204 && IS_INTERLACED(mbm_type)
6206 // This is a special case in the norm where the filtering must
6207 // be done twice (one each of the field) even if we are in a
6208 // frame macroblock.
6210 static const int nnz_idx[4] = {4,5,6,3};
6211 unsigned int tmp_linesize = 2 * linesize;
6212 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6213 int mbn_xy = mb_xy - 2 * s->mb_stride;
6218 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6219 if( IS_INTRA(mb_type) ||
6220 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6221 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6223 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6224 for( i = 0; i < 4; i++ ) {
6225 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6226 mbn_nnz[nnz_idx[i]] != 0 )
6232 // Do not use s->qscale as luma quantizer because it has not the same
6233 // value in IPCM macroblocks.
6234 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6235 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6236 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6237 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6238 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6239 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6240 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6241 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6248 for( edge = start; edge < edges; edge++ ) {
6249 /* mbn_xy: neighbor macroblock */
6250 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6251 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6252 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6256 if( (edge&1) && IS_8x8DCT(mb_type) )
6259 if( IS_INTRA(mb_type) ||
6260 IS_INTRA(mbn_type) ) {
6263 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6264 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6273 bS[0] = bS[1] = bS[2] = bS[3] = value;
6278 if( edge & mask_edge ) {
6279 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6282 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6283 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6286 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6287 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6288 int bn_idx= b_idx - (dir ? 8:1);
6291 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6292 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6293 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6294 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6297 if(h->slice_type_nos == FF_B_TYPE && v){
6299 for( l = 0; !v && l < 2; l++ ) {
6301 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6302 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6303 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6307 bS[0] = bS[1] = bS[2] = bS[3] = v;
6313 for( i = 0; i < 4; i++ ) {
6314 int x = dir == 0 ? edge : i;
6315 int y = dir == 0 ? i : edge;
6316 int b_idx= 8 + 4 + x + 8*y;
6317 int bn_idx= b_idx - (dir ? 8:1);
6319 if( h->non_zero_count_cache[b_idx] |
6320 h->non_zero_count_cache[bn_idx] ) {
6326 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6327 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6328 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6329 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6335 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6337 for( l = 0; l < 2; l++ ) {
6339 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6340 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6341 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6350 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6355 // Do not use s->qscale as luma quantizer because it has not the same
6356 // value in IPCM macroblocks.
6357 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6358 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6359 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6360 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6362 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6363 if( (edge&1) == 0 ) {
6364 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6365 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6366 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6367 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6370 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6371 if( (edge&1) == 0 ) {
6372 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6373 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6374 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6375 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6381 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6382 MpegEncContext * const s = &h->s;
6383 const int mb_xy= mb_x + mb_y*s->mb_stride;
6384 const int mb_type = s->current_picture.mb_type[mb_xy];
6385 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6386 int first_vertical_edge_done = 0;
6389 //for sufficiently low qp, filtering wouldn't do anything
6390 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6392 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6393 int qp = s->current_picture.qscale_table[mb_xy];
6395 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6396 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6401 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6402 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6403 int top_type, left_type[2];
6404 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6405 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6406 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6408 if(IS_8x8DCT(top_type)){
6409 h->non_zero_count_cache[4+8*0]=
6410 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6411 h->non_zero_count_cache[6+8*0]=
6412 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6414 if(IS_8x8DCT(left_type[0])){
6415 h->non_zero_count_cache[3+8*1]=
6416 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6418 if(IS_8x8DCT(left_type[1])){
6419 h->non_zero_count_cache[3+8*3]=
6420 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6423 if(IS_8x8DCT(mb_type)){
6424 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6425 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6427 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6428 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6430 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6431 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6433 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6434 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6439 // left mb is in picture
6440 && h->slice_table[mb_xy-1] != 0xFFFF
6441 // and current and left pair do not have the same interlaced type
6442 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6443 // and left mb is in the same slice if deblocking_filter == 2
6444 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6445 /* First vertical edge is different in MBAFF frames
6446 * There are 8 different bS to compute and 2 different Qp
6448 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6449 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6454 int mb_qp, mbn0_qp, mbn1_qp;
6456 first_vertical_edge_done = 1;
6458 if( IS_INTRA(mb_type) )
6459 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6461 for( i = 0; i < 8; i++ ) {
6462 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6464 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6466 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6467 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6468 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6470 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6477 mb_qp = s->current_picture.qscale_table[mb_xy];
6478 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6479 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6480 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6481 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6482 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6483 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6484 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6485 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6486 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6487 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6488 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6489 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6492 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6493 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6494 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6495 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6496 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6500 for( dir = 0; dir < 2; dir++ )
6501 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6503 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6504 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6508 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6509 H264Context *h = *(void**)arg;
6510 MpegEncContext * const s = &h->s;
6511 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6515 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6516 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6518 if( h->pps.cabac ) {
6522 align_get_bits( &s->gb );
6525 ff_init_cabac_states( &h->cabac);
6526 ff_init_cabac_decoder( &h->cabac,
6527 s->gb.buffer + get_bits_count(&s->gb)/8,
6528 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6529 /* calculate pre-state */
6530 for( i= 0; i < 460; i++ ) {
6532 if( h->slice_type_nos == FF_I_TYPE )
6533 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6535 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6538 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6540 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6545 int ret = decode_mb_cabac(h);
6547 //STOP_TIMER("decode_mb_cabac")
6549 if(ret>=0) hl_decode_mb(h);
6551 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6554 if(ret>=0) ret = decode_mb_cabac(h);
6556 if(ret>=0) hl_decode_mb(h);
6559 eos = get_cabac_terminate( &h->cabac );
6561 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6562 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6563 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6567 if( ++s->mb_x >= s->mb_width ) {
6569 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6571 if(FIELD_OR_MBAFF_PICTURE) {
6576 if( eos || s->mb_y >= s->mb_height ) {
6577 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6578 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6585 int ret = decode_mb_cavlc(h);
6587 if(ret>=0) hl_decode_mb(h);
6589 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6591 ret = decode_mb_cavlc(h);
6593 if(ret>=0) hl_decode_mb(h);
6598 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6599 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6604 if(++s->mb_x >= s->mb_width){
6606 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6608 if(FIELD_OR_MBAFF_PICTURE) {
6611 if(s->mb_y >= s->mb_height){
6612 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6614 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6615 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6619 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6626 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6627 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6628 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6629 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6633 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6642 for(;s->mb_y < s->mb_height; s->mb_y++){
6643 for(;s->mb_x < s->mb_width; s->mb_x++){
6644 int ret= decode_mb(h);
6649 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6650 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6655 if(++s->mb_x >= s->mb_width){
6657 if(++s->mb_y >= s->mb_height){
6658 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6659 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6663 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6670 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6671 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6672 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6676 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6683 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6686 return -1; //not reached
6689 static int decode_picture_timing(H264Context *h){
6690 MpegEncContext * const s = &h->s;
6691 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6692 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6693 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6695 if(h->sps.pic_struct_present_flag){
6696 unsigned int i, num_clock_ts;
6697 h->sei_pic_struct = get_bits(&s->gb, 4);
6699 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6702 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6704 for (i = 0 ; i < num_clock_ts ; i++){
6705 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6706 unsigned int full_timestamp_flag;
6707 skip_bits(&s->gb, 2); /* ct_type */
6708 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6709 skip_bits(&s->gb, 5); /* counting_type */
6710 full_timestamp_flag = get_bits(&s->gb, 1);
6711 skip_bits(&s->gb, 1); /* discontinuity_flag */
6712 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6713 skip_bits(&s->gb, 8); /* n_frames */
6714 if(full_timestamp_flag){
6715 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6716 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6717 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6719 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6720 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6721 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6722 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6723 if(get_bits(&s->gb, 1)) /* hours_flag */
6724 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6728 if(h->sps.time_offset_length > 0)
6729 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6736 static int decode_unregistered_user_data(H264Context *h, int size){
6737 MpegEncContext * const s = &h->s;
6738 uint8_t user_data[16+256];
6744 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6745 user_data[i]= get_bits(&s->gb, 8);
6749 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6750 if(e==1 && build>=0)
6751 h->x264_build= build;
6753 if(s->avctx->debug & FF_DEBUG_BUGS)
6754 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6757 skip_bits(&s->gb, 8);
6762 static int decode_sei(H264Context *h){
6763 MpegEncContext * const s = &h->s;
6765 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6770 type+= show_bits(&s->gb, 8);
6771 }while(get_bits(&s->gb, 8) == 255);
6775 size+= show_bits(&s->gb, 8);
6776 }while(get_bits(&s->gb, 8) == 255);
6779 case 1: // Picture timing SEI
6780 if(decode_picture_timing(h) < 0)
6784 if(decode_unregistered_user_data(h, size) < 0)
6788 skip_bits(&s->gb, 8*size);
6791 //FIXME check bits here
6792 align_get_bits(&s->gb);
6798 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6799 MpegEncContext * const s = &h->s;
6801 cpb_count = get_ue_golomb(&s->gb) + 1;
6803 if(cpb_count > 32U){
6804 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6808 get_bits(&s->gb, 4); /* bit_rate_scale */
6809 get_bits(&s->gb, 4); /* cpb_size_scale */
6810 for(i=0; i<cpb_count; i++){
6811 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6812 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6813 get_bits1(&s->gb); /* cbr_flag */
6815 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6816 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6817 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6818 sps->time_offset_length = get_bits(&s->gb, 5);
6822 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6823 MpegEncContext * const s = &h->s;
6824 int aspect_ratio_info_present_flag;
6825 unsigned int aspect_ratio_idc;
6827 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6829 if( aspect_ratio_info_present_flag ) {
6830 aspect_ratio_idc= get_bits(&s->gb, 8);
6831 if( aspect_ratio_idc == EXTENDED_SAR ) {
6832 sps->sar.num= get_bits(&s->gb, 16);
6833 sps->sar.den= get_bits(&s->gb, 16);
6834 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6835 sps->sar= pixel_aspect[aspect_ratio_idc];
6837 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6844 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6846 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6847 get_bits1(&s->gb); /* overscan_appropriate_flag */
6850 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6851 get_bits(&s->gb, 3); /* video_format */
6852 get_bits1(&s->gb); /* video_full_range_flag */
6853 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6854 get_bits(&s->gb, 8); /* colour_primaries */
6855 get_bits(&s->gb, 8); /* transfer_characteristics */
6856 get_bits(&s->gb, 8); /* matrix_coefficients */
6860 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6861 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6862 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6865 sps->timing_info_present_flag = get_bits1(&s->gb);
6866 if(sps->timing_info_present_flag){
6867 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6868 sps->time_scale = get_bits_long(&s->gb, 32);
6869 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6872 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6873 if(sps->nal_hrd_parameters_present_flag)
6874 if(decode_hrd_parameters(h, sps) < 0)
6876 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6877 if(sps->vcl_hrd_parameters_present_flag)
6878 if(decode_hrd_parameters(h, sps) < 0)
6880 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6881 get_bits1(&s->gb); /* low_delay_hrd_flag */
6882 sps->pic_struct_present_flag = get_bits1(&s->gb);
6884 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6885 if(sps->bitstream_restriction_flag){
6886 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6887 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6888 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6889 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6890 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6891 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6892 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6894 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6895 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6903 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6904 const uint8_t *jvt_list, const uint8_t *fallback_list){
6905 MpegEncContext * const s = &h->s;
6906 int i, last = 8, next = 8;
6907 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6908 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6909 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6911 for(i=0;i<size;i++){
6913 next = (last + get_se_golomb(&s->gb)) & 0xff;
6914 if(!i && !next){ /* matrix not written, we use the preset one */
6915 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6918 last = factors[scan[i]] = next ? next : last;
6922 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6923 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6924 MpegEncContext * const s = &h->s;
6925 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6926 const uint8_t *fallback[4] = {
6927 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6928 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6929 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6930 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6932 if(get_bits1(&s->gb)){
6933 sps->scaling_matrix_present |= is_sps;
6934 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6935 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6936 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6937 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6938 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6939 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6940 if(is_sps || pps->transform_8x8_mode){
6941 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6942 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6947 static inline int decode_seq_parameter_set(H264Context *h){
6948 MpegEncContext * const s = &h->s;
6949 int profile_idc, level_idc;
6950 unsigned int sps_id;
6954 profile_idc= get_bits(&s->gb, 8);
6955 get_bits1(&s->gb); //constraint_set0_flag
6956 get_bits1(&s->gb); //constraint_set1_flag
6957 get_bits1(&s->gb); //constraint_set2_flag
6958 get_bits1(&s->gb); //constraint_set3_flag
6959 get_bits(&s->gb, 4); // reserved
6960 level_idc= get_bits(&s->gb, 8);
6961 sps_id= get_ue_golomb(&s->gb);
6963 if(sps_id >= MAX_SPS_COUNT) {
6964 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
6967 sps= av_mallocz(sizeof(SPS));
6971 sps->profile_idc= profile_idc;
6972 sps->level_idc= level_idc;
6974 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
6975 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
6976 sps->scaling_matrix_present = 0;
6978 if(sps->profile_idc >= 100){ //high profile
6979 sps->chroma_format_idc= get_ue_golomb(&s->gb);
6980 if(sps->chroma_format_idc == 3)
6981 get_bits1(&s->gb); //residual_color_transform_flag
6982 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6983 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6984 sps->transform_bypass = get_bits1(&s->gb);
6985 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6987 sps->chroma_format_idc= 1;
6990 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6991 sps->poc_type= get_ue_golomb(&s->gb);
6993 if(sps->poc_type == 0){ //FIXME #define
6994 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6995 } else if(sps->poc_type == 1){//FIXME #define
6996 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6997 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6998 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6999 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7001 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7002 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7006 for(i=0; i<sps->poc_cycle_length; i++)
7007 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7008 }else if(sps->poc_type != 2){
7009 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7013 sps->ref_frame_count= get_ue_golomb(&s->gb);
7014 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7015 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7018 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7019 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7020 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7021 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7022 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7023 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7027 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7028 if(!sps->frame_mbs_only_flag)
7029 sps->mb_aff= get_bits1(&s->gb);
7033 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7035 #ifndef ALLOW_INTERLACE
7037 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7039 sps->crop= get_bits1(&s->gb);
7041 sps->crop_left = get_ue_golomb(&s->gb);
7042 sps->crop_right = get_ue_golomb(&s->gb);
7043 sps->crop_top = get_ue_golomb(&s->gb);
7044 sps->crop_bottom= get_ue_golomb(&s->gb);
7045 if(sps->crop_left || sps->crop_top){
7046 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7048 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7049 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7055 sps->crop_bottom= 0;
7058 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7059 if( sps->vui_parameters_present_flag )
7060 decode_vui_parameters(h, sps);
7062 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7063 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7064 sps_id, sps->profile_idc, sps->level_idc,
7066 sps->ref_frame_count,
7067 sps->mb_width, sps->mb_height,
7068 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7069 sps->direct_8x8_inference_flag ? "8B8" : "",
7070 sps->crop_left, sps->crop_right,
7071 sps->crop_top, sps->crop_bottom,
7072 sps->vui_parameters_present_flag ? "VUI" : "",
7073 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7076 av_free(h->sps_buffers[sps_id]);
7077 h->sps_buffers[sps_id]= sps;
7085 build_qp_table(PPS *pps, int t, int index)
7088 for(i = 0; i < 52; i++)
7089 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7092 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7093 MpegEncContext * const s = &h->s;
7094 unsigned int pps_id= get_ue_golomb(&s->gb);
7097 if(pps_id >= MAX_PPS_COUNT) {
7098 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7102 pps= av_mallocz(sizeof(PPS));
7105 pps->sps_id= get_ue_golomb(&s->gb);
7106 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7107 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7111 pps->cabac= get_bits1(&s->gb);
7112 pps->pic_order_present= get_bits1(&s->gb);
7113 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7114 if(pps->slice_group_count > 1 ){
7115 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7116 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7117 switch(pps->mb_slice_group_map_type){
7120 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7121 | run_length[ i ] |1 |ue(v) |
7126 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7128 | top_left_mb[ i ] |1 |ue(v) |
7129 | bottom_right_mb[ i ] |1 |ue(v) |
7137 | slice_group_change_direction_flag |1 |u(1) |
7138 | slice_group_change_rate_minus1 |1 |ue(v) |
7143 | slice_group_id_cnt_minus1 |1 |ue(v) |
7144 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7146 | slice_group_id[ i ] |1 |u(v) |
7151 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7152 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7153 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7154 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7158 pps->weighted_pred= get_bits1(&s->gb);
7159 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7160 pps->init_qp= get_se_golomb(&s->gb) + 26;
7161 pps->init_qs= get_se_golomb(&s->gb) + 26;
7162 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7163 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7164 pps->constrained_intra_pred= get_bits1(&s->gb);
7165 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7167 pps->transform_8x8_mode= 0;
7168 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7169 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7170 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7172 if(get_bits_count(&s->gb) < bit_length){
7173 pps->transform_8x8_mode= get_bits1(&s->gb);
7174 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7175 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7177 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7180 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7181 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7182 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7183 h->pps.chroma_qp_diff= 1;
7185 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7186 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7187 pps_id, pps->sps_id,
7188 pps->cabac ? "CABAC" : "CAVLC",
7189 pps->slice_group_count,
7190 pps->ref_count[0], pps->ref_count[1],
7191 pps->weighted_pred ? "weighted" : "",
7192 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7193 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7194 pps->constrained_intra_pred ? "CONSTR" : "",
7195 pps->redundant_pic_cnt_present ? "REDU" : "",
7196 pps->transform_8x8_mode ? "8x8DCT" : ""
7200 av_free(h->pps_buffers[pps_id]);
7201 h->pps_buffers[pps_id]= pps;
7209 * Call decode_slice() for each context.
7211 * @param h h264 master context
7212 * @param context_count number of contexts to execute
7214 static void execute_decode_slices(H264Context *h, int context_count){
7215 MpegEncContext * const s = &h->s;
7216 AVCodecContext * const avctx= s->avctx;
7220 if(context_count == 1) {
7221 decode_slice(avctx, &h);
7223 for(i = 1; i < context_count; i++) {
7224 hx = h->thread_context[i];
7225 hx->s.error_recognition = avctx->error_recognition;
7226 hx->s.error_count = 0;
7229 avctx->execute(avctx, (void *)decode_slice,
7230 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7232 /* pull back stuff from slices to master context */
7233 hx = h->thread_context[context_count - 1];
7234 s->mb_x = hx->s.mb_x;
7235 s->mb_y = hx->s.mb_y;
7236 s->dropable = hx->s.dropable;
7237 s->picture_structure = hx->s.picture_structure;
7238 for(i = 1; i < context_count; i++)
7239 h->s.error_count += h->thread_context[i]->s.error_count;
7244 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7245 MpegEncContext * const s = &h->s;
7246 AVCodecContext * const avctx= s->avctx;
7248 H264Context *hx; ///< thread context
7249 int context_count = 0;
7251 h->max_contexts = avctx->thread_count;
7254 for(i=0; i<50; i++){
7255 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7258 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7259 h->current_slice = 0;
7260 if (!s->first_field)
7261 s->current_picture_ptr= NULL;
7273 if(buf_index >= buf_size) break;
7275 for(i = 0; i < h->nal_length_size; i++)
7276 nalsize = (nalsize << 8) | buf[buf_index++];
7277 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7282 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7287 // start code prefix search
7288 for(; buf_index + 3 < buf_size; buf_index++){
7289 // This should always succeed in the first iteration.
7290 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7294 if(buf_index+3 >= buf_size) break;
7299 hx = h->thread_context[context_count];
7301 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7302 if (ptr==NULL || dst_length < 0){
7305 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7307 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7309 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7310 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7313 if (h->is_avc && (nalsize != consumed)){
7314 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7318 buf_index += consumed;
7320 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7321 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7326 switch(hx->nal_unit_type){
7328 if (h->nal_unit_type != NAL_IDR_SLICE) {
7329 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7332 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7334 init_get_bits(&hx->s.gb, ptr, bit_length);
7336 hx->inter_gb_ptr= &hx->s.gb;
7337 hx->s.data_partitioning = 0;
7339 if((err = decode_slice_header(hx, h)))
7342 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7343 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7344 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7345 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7346 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7347 && avctx->skip_frame < AVDISCARD_ALL)
7351 init_get_bits(&hx->s.gb, ptr, bit_length);
7353 hx->inter_gb_ptr= NULL;
7354 hx->s.data_partitioning = 1;
7356 err = decode_slice_header(hx, h);
7359 init_get_bits(&hx->intra_gb, ptr, bit_length);
7360 hx->intra_gb_ptr= &hx->intra_gb;
7363 init_get_bits(&hx->inter_gb, ptr, bit_length);
7364 hx->inter_gb_ptr= &hx->inter_gb;
7366 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7367 && s->context_initialized
7369 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7370 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7371 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7372 && avctx->skip_frame < AVDISCARD_ALL)
7376 init_get_bits(&s->gb, ptr, bit_length);
7380 init_get_bits(&s->gb, ptr, bit_length);
7381 decode_seq_parameter_set(h);
7383 if(s->flags& CODEC_FLAG_LOW_DELAY)
7386 if(avctx->has_b_frames < 2)
7387 avctx->has_b_frames= !s->low_delay;
7390 init_get_bits(&s->gb, ptr, bit_length);
7392 decode_picture_parameter_set(h, bit_length);
7396 case NAL_END_SEQUENCE:
7397 case NAL_END_STREAM:
7398 case NAL_FILLER_DATA:
7400 case NAL_AUXILIARY_SLICE:
7403 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7406 if(context_count == h->max_contexts) {
7407 execute_decode_slices(h, context_count);
7412 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7414 /* Slice could not be decoded in parallel mode, copy down
7415 * NAL unit stuff to context 0 and restart. Note that
7416 * rbsp_buffer is not transferred, but since we no longer
7417 * run in parallel mode this should not be an issue. */
7418 h->nal_unit_type = hx->nal_unit_type;
7419 h->nal_ref_idc = hx->nal_ref_idc;
7425 execute_decode_slices(h, context_count);
7430 * returns the number of bytes consumed for building the current frame
7432 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7433 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7434 if(pos+10>buf_size) pos=buf_size; // oops ;)
7439 static int decode_frame(AVCodecContext *avctx,
7440 void *data, int *data_size,
7441 const uint8_t *buf, int buf_size)
7443 H264Context *h = avctx->priv_data;
7444 MpegEncContext *s = &h->s;
7445 AVFrame *pict = data;
7448 s->flags= avctx->flags;
7449 s->flags2= avctx->flags2;
7451 /* end of stream, output what is still in the buffers */
7452 if (buf_size == 0) {
7456 //FIXME factorize this with the output code below
7457 out = h->delayed_pic[0];
7459 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7460 if(h->delayed_pic[i]->poc < out->poc){
7461 out = h->delayed_pic[i];
7465 for(i=out_idx; h->delayed_pic[i]; i++)
7466 h->delayed_pic[i] = h->delayed_pic[i+1];
7469 *data_size = sizeof(AVFrame);
7470 *pict= *(AVFrame*)out;
7476 if(h->is_avc && !h->got_avcC) {
7477 int i, cnt, nalsize;
7478 unsigned char *p = avctx->extradata;
7479 if(avctx->extradata_size < 7) {
7480 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7484 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7487 /* sps and pps in the avcC always have length coded with 2 bytes,
7488 so put a fake nal_length_size = 2 while parsing them */
7489 h->nal_length_size = 2;
7490 // Decode sps from avcC
7491 cnt = *(p+5) & 0x1f; // Number of sps
7493 for (i = 0; i < cnt; i++) {
7494 nalsize = AV_RB16(p) + 2;
7495 if(decode_nal_units(h, p, nalsize) < 0) {
7496 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7501 // Decode pps from avcC
7502 cnt = *(p++); // Number of pps
7503 for (i = 0; i < cnt; i++) {
7504 nalsize = AV_RB16(p) + 2;
7505 if(decode_nal_units(h, p, nalsize) != nalsize) {
7506 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7511 // Now store right nal length size, that will be use to parse all other nals
7512 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7513 // Do not reparse avcC
7517 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7518 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7523 buf_index=decode_nal_units(h, buf, buf_size);
7527 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7528 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7529 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7533 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7534 Picture *out = s->current_picture_ptr;
7535 Picture *cur = s->current_picture_ptr;
7536 int i, pics, cross_idr, out_of_order, out_idx;
7540 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7541 s->current_picture_ptr->pict_type= s->pict_type;
7544 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7545 h->prev_poc_msb= h->poc_msb;
7546 h->prev_poc_lsb= h->poc_lsb;
7548 h->prev_frame_num_offset= h->frame_num_offset;
7549 h->prev_frame_num= h->frame_num;
7552 * FIXME: Error handling code does not seem to support interlaced
7553 * when slices span multiple rows
7554 * The ff_er_add_slice calls don't work right for bottom
7555 * fields; they cause massive erroneous error concealing
7556 * Error marking covers both fields (top and bottom).
7557 * This causes a mismatched s->error_count
7558 * and a bad error table. Further, the error count goes to
7559 * INT_MAX when called for bottom field, because mb_y is
7560 * past end by one (callers fault) and resync_mb_y != 0
7561 * causes problems for the first MB line, too.
7568 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7569 /* Wait for second field. */
7573 cur->repeat_pict = 0;
7575 /* Signal interlacing information externally. */
7576 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7577 if(h->sps.pic_struct_present_flag){
7578 switch (h->sei_pic_struct)
7580 case SEI_PIC_STRUCT_FRAME:
7581 cur->interlaced_frame = 0;
7583 case SEI_PIC_STRUCT_TOP_FIELD:
7584 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7585 case SEI_PIC_STRUCT_TOP_BOTTOM:
7586 case SEI_PIC_STRUCT_BOTTOM_TOP:
7587 cur->interlaced_frame = 1;
7589 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7590 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7591 // Signal the possibility of telecined film externally (pic_struct 5,6)
7592 // From these hints, let the applications decide if they apply deinterlacing.
7593 cur->repeat_pict = 1;
7594 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7596 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7597 // Force progressive here, as doubling interlaced frame is a bad idea.
7598 cur->interlaced_frame = 0;
7599 cur->repeat_pict = 2;
7601 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7602 cur->interlaced_frame = 0;
7603 cur->repeat_pict = 4;
7607 /* Derive interlacing flag from used decoding process. */
7608 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7611 if (cur->field_poc[0] != cur->field_poc[1]){
7612 /* Derive top_field_first from field pocs. */
7613 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7615 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7616 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7617 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7618 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7619 cur->top_field_first = 1;
7621 cur->top_field_first = 0;
7623 /* Most likely progressive */
7624 cur->top_field_first = 0;
7628 //FIXME do something with unavailable reference frames
7630 /* Sort B-frames into display order */
7632 if(h->sps.bitstream_restriction_flag
7633 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7634 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7638 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7639 && !h->sps.bitstream_restriction_flag){
7640 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7645 while(h->delayed_pic[pics]) pics++;
7647 assert(pics <= MAX_DELAYED_PIC_COUNT);
7649 h->delayed_pic[pics++] = cur;
7650 if(cur->reference == 0)
7651 cur->reference = DELAYED_PIC_REF;
7653 out = h->delayed_pic[0];
7655 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7656 if(h->delayed_pic[i]->poc < out->poc){
7657 out = h->delayed_pic[i];
7660 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7662 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7664 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7666 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7668 ((!cross_idr && out->poc > h->outputed_poc + 2)
7669 || cur->pict_type == FF_B_TYPE)))
7672 s->avctx->has_b_frames++;
7675 if(out_of_order || pics > s->avctx->has_b_frames){
7676 out->reference &= ~DELAYED_PIC_REF;
7677 for(i=out_idx; h->delayed_pic[i]; i++)
7678 h->delayed_pic[i] = h->delayed_pic[i+1];
7680 if(!out_of_order && pics > s->avctx->has_b_frames){
7681 *data_size = sizeof(AVFrame);
7683 h->outputed_poc = out->poc;
7684 *pict= *(AVFrame*)out;
7686 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7691 assert(pict->data[0] || !*data_size);
7692 ff_print_debug_info(s, pict);
7693 //printf("out %d\n", (int)pict->data[0]);
7696 /* Return the Picture timestamp as the frame number */
7697 /* we subtract 1 because it is added on utils.c */
7698 avctx->frame_number = s->picture_number - 1;
7700 return get_consumed_bytes(s, buf_index, buf_size);
7703 static inline void fill_mb_avail(H264Context *h){
7704 MpegEncContext * const s = &h->s;
7705 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7708 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7709 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7710 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7716 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7717 h->mb_avail[4]= 1; //FIXME move out
7718 h->mb_avail[5]= 0; //FIXME move out
7726 #define SIZE (COUNT*40)
7732 // int int_temp[10000];
7734 AVCodecContext avctx;
7736 dsputil_init(&dsp, &avctx);
7738 init_put_bits(&pb, temp, SIZE);
7739 printf("testing unsigned exp golomb\n");
7740 for(i=0; i<COUNT; i++){
7742 set_ue_golomb(&pb, i);
7743 STOP_TIMER("set_ue_golomb");
7745 flush_put_bits(&pb);
7747 init_get_bits(&gb, temp, 8*SIZE);
7748 for(i=0; i<COUNT; i++){
7751 s= show_bits(&gb, 24);
7754 j= get_ue_golomb(&gb);
7756 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7759 STOP_TIMER("get_ue_golomb");
7763 init_put_bits(&pb, temp, SIZE);
7764 printf("testing signed exp golomb\n");
7765 for(i=0; i<COUNT; i++){
7767 set_se_golomb(&pb, i - COUNT/2);
7768 STOP_TIMER("set_se_golomb");
7770 flush_put_bits(&pb);
7772 init_get_bits(&gb, temp, 8*SIZE);
7773 for(i=0; i<COUNT; i++){
7776 s= show_bits(&gb, 24);
7779 j= get_se_golomb(&gb);
7780 if(j != i - COUNT/2){
7781 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7784 STOP_TIMER("get_se_golomb");
7788 printf("testing 4x4 (I)DCT\n");
7791 uint8_t src[16], ref[16];
7792 uint64_t error= 0, max_error=0;
7794 for(i=0; i<COUNT; i++){
7796 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7797 for(j=0; j<16; j++){
7798 ref[j]= random()%255;
7799 src[j]= random()%255;
7802 h264_diff_dct_c(block, src, ref, 4);
7805 for(j=0; j<16; j++){
7806 // printf("%d ", block[j]);
7807 block[j]= block[j]*4;
7808 if(j&1) block[j]= (block[j]*4 + 2)/5;
7809 if(j&4) block[j]= (block[j]*4 + 2)/5;
7813 s->dsp.h264_idct_add(ref, block, 4);
7814 /* for(j=0; j<16; j++){
7815 printf("%d ", ref[j]);
7819 for(j=0; j<16; j++){
7820 int diff= FFABS(src[j] - ref[j]);
7823 max_error= FFMAX(max_error, diff);
7826 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7827 printf("testing quantizer\n");
7828 for(qp=0; qp<52; qp++){
7830 src1_block[i]= src2_block[i]= random()%255;
7833 printf("Testing NAL layer\n");
7835 uint8_t bitstream[COUNT];
7836 uint8_t nal[COUNT*2];
7838 memset(&h, 0, sizeof(H264Context));
7840 for(i=0; i<COUNT; i++){
7848 for(j=0; j<COUNT; j++){
7849 bitstream[j]= (random() % 255) + 1;
7852 for(j=0; j<zeros; j++){
7853 int pos= random() % COUNT;
7854 while(bitstream[pos] == 0){
7863 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7865 printf("encoding failed\n");
7869 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7873 if(out_length != COUNT){
7874 printf("incorrect length %d %d\n", out_length, COUNT);
7878 if(consumed != nal_length){
7879 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7883 if(memcmp(bitstream, out, COUNT)){
7884 printf("mismatch\n");
7890 printf("Testing RBSP\n");
7898 static av_cold int decode_end(AVCodecContext *avctx)
7900 H264Context *h = avctx->priv_data;
7901 MpegEncContext *s = &h->s;
7904 av_freep(&h->rbsp_buffer[0]);
7905 av_freep(&h->rbsp_buffer[1]);
7906 free_tables(h); //FIXME cleanup init stuff perhaps
7908 for(i = 0; i < MAX_SPS_COUNT; i++)
7909 av_freep(h->sps_buffers + i);
7911 for(i = 0; i < MAX_PPS_COUNT; i++)
7912 av_freep(h->pps_buffers + i);
7916 // memset(h, 0, sizeof(H264Context));
7922 AVCodec h264_decoder = {
7926 sizeof(H264Context),
7931 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7933 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),