2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1591 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1592 int src_x_offset, int src_y_offset,
1593 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1594 MpegEncContext * const s = &h->s;
1595 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1596 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1597 const int luma_xy= (mx&3) + ((my&3)<<2);
1598 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1599 uint8_t * src_cb, * src_cr;
1600 int extra_width= h->emu_edge_width;
1601 int extra_height= h->emu_edge_height;
1603 const int full_mx= mx>>2;
1604 const int full_my= my>>2;
1605 const int pic_width = 16*s->mb_width;
1606 const int pic_height = 16*s->mb_height >> MB_FIELD;
1608 if(mx&7) extra_width -= 3;
1609 if(my&7) extra_height -= 3;
1611 if( full_mx < 0-extra_width
1612 || full_my < 0-extra_height
1613 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1614 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1615 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1616 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1620 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1622 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1625 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1628 // chroma offset when predicting from a field of opposite parity
1629 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1630 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1632 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1633 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1636 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1637 src_cb= s->edge_emu_buffer;
1639 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1642 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1643 src_cr= s->edge_emu_buffer;
1645 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1648 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1649 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1650 int x_offset, int y_offset,
1651 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1652 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1653 int list0, int list1){
1654 MpegEncContext * const s = &h->s;
1655 qpel_mc_func *qpix_op= qpix_put;
1656 h264_chroma_mc_func chroma_op= chroma_put;
1658 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1659 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1660 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1661 x_offset += 8*s->mb_x;
1662 y_offset += 8*(s->mb_y >> MB_FIELD);
1665 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1666 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1667 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1668 qpix_op, chroma_op);
1671 chroma_op= chroma_avg;
1675 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1676 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1677 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1678 qpix_op, chroma_op);
1682 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1683 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1684 int x_offset, int y_offset,
1685 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1686 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1687 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1688 int list0, int list1){
1689 MpegEncContext * const s = &h->s;
1691 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1692 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1693 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1694 x_offset += 8*s->mb_x;
1695 y_offset += 8*(s->mb_y >> MB_FIELD);
1698 /* don't optimize for luma-only case, since B-frames usually
1699 * use implicit weights => chroma too. */
1700 uint8_t *tmp_cb = s->obmc_scratchpad;
1701 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1702 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1703 int refn0 = h->ref_cache[0][ scan8[n] ];
1704 int refn1 = h->ref_cache[1][ scan8[n] ];
1706 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1707 dest_y, dest_cb, dest_cr,
1708 x_offset, y_offset, qpix_put, chroma_put);
1709 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1710 tmp_y, tmp_cb, tmp_cr,
1711 x_offset, y_offset, qpix_put, chroma_put);
1713 if(h->use_weight == 2){
1714 int weight0 = h->implicit_weight[refn0][refn1];
1715 int weight1 = 64 - weight0;
1716 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1717 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1718 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1720 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1721 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1722 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1723 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1724 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1725 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1726 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1727 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1728 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1731 int list = list1 ? 1 : 0;
1732 int refn = h->ref_cache[list][ scan8[n] ];
1733 Picture *ref= &h->ref_list[list][refn];
1734 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1735 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1736 qpix_put, chroma_put);
1738 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1739 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1740 if(h->use_weight_chroma){
1741 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1742 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1743 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1749 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1750 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1751 int x_offset, int y_offset,
1752 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1753 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1754 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1755 int list0, int list1){
1756 if((h->use_weight==2 && list0 && list1
1757 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1758 || h->use_weight==1)
1759 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1760 x_offset, y_offset, qpix_put, chroma_put,
1761 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1763 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1764 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1767 static inline void prefetch_motion(H264Context *h, int list){
1768 /* fetch pixels for estimated mv 4 macroblocks ahead
1769 * optimized for 64byte cache lines */
1770 MpegEncContext * const s = &h->s;
1771 const int refn = h->ref_cache[list][scan8[0]];
1773 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1774 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1775 uint8_t **src= h->ref_list[list][refn].data;
1776 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1777 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1778 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1779 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1783 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1784 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1785 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1786 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1787 MpegEncContext * const s = &h->s;
1788 const int mb_xy= h->mb_xy;
1789 const int mb_type= s->current_picture.mb_type[mb_xy];
1791 assert(IS_INTER(mb_type));
1793 prefetch_motion(h, 0);
1795 if(IS_16X16(mb_type)){
1796 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1797 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1798 &weight_op[0], &weight_avg[0],
1799 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1800 }else if(IS_16X8(mb_type)){
1801 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1802 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1803 &weight_op[1], &weight_avg[1],
1804 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1805 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1806 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1807 &weight_op[1], &weight_avg[1],
1808 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1809 }else if(IS_8X16(mb_type)){
1810 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1811 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1812 &weight_op[2], &weight_avg[2],
1813 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1814 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1815 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1816 &weight_op[2], &weight_avg[2],
1817 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 assert(IS_8X8(mb_type));
1824 const int sub_mb_type= h->sub_mb_type[i];
1826 int x_offset= (i&1)<<2;
1827 int y_offset= (i&2)<<1;
1829 if(IS_SUB_8X8(sub_mb_type)){
1830 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1832 &weight_op[3], &weight_avg[3],
1833 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1834 }else if(IS_SUB_8X4(sub_mb_type)){
1835 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1836 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1837 &weight_op[4], &weight_avg[4],
1838 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1839 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1840 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1841 &weight_op[4], &weight_avg[4],
1842 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1843 }else if(IS_SUB_4X8(sub_mb_type)){
1844 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1845 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1846 &weight_op[5], &weight_avg[5],
1847 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1848 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1849 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1850 &weight_op[5], &weight_avg[5],
1851 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 assert(IS_SUB_4X4(sub_mb_type));
1856 int sub_x_offset= x_offset + 2*(j&1);
1857 int sub_y_offset= y_offset + (j&2);
1858 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1859 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1860 &weight_op[6], &weight_avg[6],
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1867 prefetch_motion(h, 1);
1870 static av_cold void decode_init_vlc(void){
1871 static int done = 0;
1878 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1879 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1880 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1881 &chroma_dc_coeff_token_len [0], 1, 1,
1882 &chroma_dc_coeff_token_bits[0], 1, 1,
1883 INIT_VLC_USE_NEW_STATIC);
1887 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1888 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1889 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1890 &coeff_token_len [i][0], 1, 1,
1891 &coeff_token_bits[i][0], 1, 1,
1892 INIT_VLC_USE_NEW_STATIC);
1893 offset += coeff_token_vlc_tables_size[i];
1896 * This is a one time safety check to make sure that
1897 * the packed static coeff_token_vlc table sizes
1898 * were initialized correctly.
1900 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1903 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1904 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1905 init_vlc(&chroma_dc_total_zeros_vlc[i],
1906 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1907 &chroma_dc_total_zeros_len [i][0], 1, 1,
1908 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1909 INIT_VLC_USE_NEW_STATIC);
1911 for(i=0; i<15; i++){
1912 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1913 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1914 init_vlc(&total_zeros_vlc[i],
1915 TOTAL_ZEROS_VLC_BITS, 16,
1916 &total_zeros_len [i][0], 1, 1,
1917 &total_zeros_bits[i][0], 1, 1,
1918 INIT_VLC_USE_NEW_STATIC);
1922 run_vlc[i].table = run_vlc_tables[i];
1923 run_vlc[i].table_allocated = run_vlc_tables_size;
1924 init_vlc(&run_vlc[i],
1926 &run_len [i][0], 1, 1,
1927 &run_bits[i][0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1930 run7_vlc.table = run7_vlc_table,
1931 run7_vlc.table_allocated = run7_vlc_table_size;
1932 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1933 &run_len [6][0], 1, 1,
1934 &run_bits[6][0], 1, 1,
1935 INIT_VLC_USE_NEW_STATIC);
1939 static void free_tables(H264Context *h){
1942 av_freep(&h->intra4x4_pred_mode);
1943 av_freep(&h->chroma_pred_mode_table);
1944 av_freep(&h->cbp_table);
1945 av_freep(&h->mvd_table[0]);
1946 av_freep(&h->mvd_table[1]);
1947 av_freep(&h->direct_table);
1948 av_freep(&h->non_zero_count);
1949 av_freep(&h->slice_table_base);
1950 h->slice_table= NULL;
1952 av_freep(&h->mb2b_xy);
1953 av_freep(&h->mb2b8_xy);
1955 for(i = 0; i < h->s.avctx->thread_count; i++) {
1956 hx = h->thread_context[i];
1958 av_freep(&hx->top_borders[1]);
1959 av_freep(&hx->top_borders[0]);
1960 av_freep(&hx->s.obmc_scratchpad);
1964 static void init_dequant8_coeff_table(H264Context *h){
1966 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1967 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1968 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1970 for(i=0; i<2; i++ ){
1971 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1972 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1976 for(q=0; q<52; q++){
1977 int shift = div6[q];
1980 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1981 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1982 h->pps.scaling_matrix8[i][x]) << shift;
1987 static void init_dequant4_coeff_table(H264Context *h){
1989 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1990 for(i=0; i<6; i++ ){
1991 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1993 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1994 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2001 for(q=0; q<52; q++){
2002 int shift = div6[q] + 2;
2005 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2006 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2007 h->pps.scaling_matrix4[i][x]) << shift;
2012 static void init_dequant_tables(H264Context *h){
2014 init_dequant4_coeff_table(h);
2015 if(h->pps.transform_8x8_mode)
2016 init_dequant8_coeff_table(h);
2017 if(h->sps.transform_bypass){
2020 h->dequant4_coeff[i][0][x] = 1<<6;
2021 if(h->pps.transform_8x8_mode)
2024 h->dequant8_coeff[i][0][x] = 1<<6;
2031 * needs width/height
2033 static int alloc_tables(H264Context *h){
2034 MpegEncContext * const s = &h->s;
2035 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2038 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2040 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2041 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2042 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2044 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2045 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2046 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2047 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2049 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2050 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2052 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2053 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2054 for(y=0; y<s->mb_height; y++){
2055 for(x=0; x<s->mb_width; x++){
2056 const int mb_xy= x + y*s->mb_stride;
2057 const int b_xy = 4*x + 4*y*h->b_stride;
2058 const int b8_xy= 2*x + 2*y*h->b8_stride;
2060 h->mb2b_xy [mb_xy]= b_xy;
2061 h->mb2b8_xy[mb_xy]= b8_xy;
2065 s->obmc_scratchpad = NULL;
2067 if(!h->dequant4_coeff[0])
2068 init_dequant_tables(h);
2077 * Mimic alloc_tables(), but for every context thread.
2079 static void clone_tables(H264Context *dst, H264Context *src){
2080 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2081 dst->non_zero_count = src->non_zero_count;
2082 dst->slice_table = src->slice_table;
2083 dst->cbp_table = src->cbp_table;
2084 dst->mb2b_xy = src->mb2b_xy;
2085 dst->mb2b8_xy = src->mb2b8_xy;
2086 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2087 dst->mvd_table[0] = src->mvd_table[0];
2088 dst->mvd_table[1] = src->mvd_table[1];
2089 dst->direct_table = src->direct_table;
2091 dst->s.obmc_scratchpad = NULL;
2092 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2097 * Allocate buffers which are not shared amongst multiple threads.
2099 static int context_init(H264Context *h){
2100 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2105 return -1; // free_tables will clean up for us
2108 static av_cold void common_init(H264Context *h){
2109 MpegEncContext * const s = &h->s;
2111 s->width = s->avctx->width;
2112 s->height = s->avctx->height;
2113 s->codec_id= s->avctx->codec->id;
2115 ff_h264_pred_init(&h->hpc, s->codec_id);
2117 h->dequant_coeff_pps= -1;
2118 s->unrestricted_mv=1;
2119 s->decode=1; //FIXME
2121 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2122 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2125 static av_cold int decode_init(AVCodecContext *avctx){
2126 H264Context *h= avctx->priv_data;
2127 MpegEncContext * const s = &h->s;
2129 MPV_decode_defaults(s);
2134 s->out_format = FMT_H264;
2135 s->workaround_bugs= avctx->workaround_bugs;
2138 // s->decode_mb= ff_h263_decode_mb;
2139 s->quarter_sample = 1;
2142 if(avctx->codec_id == CODEC_ID_SVQ3)
2143 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2145 avctx->pix_fmt= PIX_FMT_YUV420P;
2149 if(avctx->extradata_size > 0 && avctx->extradata &&
2150 *(char *)avctx->extradata == 1){
2157 h->thread_context[0] = h;
2158 h->outputed_poc = INT_MIN;
2159 h->prev_poc_msb= 1<<16;
2163 static int frame_start(H264Context *h){
2164 MpegEncContext * const s = &h->s;
2167 if(MPV_frame_start(s, s->avctx) < 0)
2169 ff_er_frame_start(s);
2171 * MPV_frame_start uses pict_type to derive key_frame.
2172 * This is incorrect for H.264; IDR markings must be used.
2173 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2174 * See decode_nal_units().
2176 s->current_picture_ptr->key_frame= 0;
2178 assert(s->linesize && s->uvlinesize);
2180 for(i=0; i<16; i++){
2181 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2182 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2185 h->block_offset[16+i]=
2186 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2187 h->block_offset[24+16+i]=
2188 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2191 /* can't be in alloc_tables because linesize isn't known there.
2192 * FIXME: redo bipred weight to not require extra buffer? */
2193 for(i = 0; i < s->avctx->thread_count; i++)
2194 if(!h->thread_context[i]->s.obmc_scratchpad)
2195 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2197 /* some macroblocks will be accessed before they're available */
2198 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2199 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2201 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2203 // We mark the current picture as non-reference after allocating it, so
2204 // that if we break out due to an error it can be released automatically
2205 // in the next MPV_frame_start().
2206 // SVQ3 as well as most other codecs have only last/next/current and thus
2207 // get released even with set reference, besides SVQ3 and others do not
2208 // mark frames as reference later "naturally".
2209 if(s->codec_id != CODEC_ID_SVQ3)
2210 s->current_picture_ptr->reference= 0;
2212 s->current_picture_ptr->field_poc[0]=
2213 s->current_picture_ptr->field_poc[1]= INT_MAX;
2214 assert(s->current_picture_ptr->long_ref==0);
2219 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2220 MpegEncContext * const s = &h->s;
2229 src_cb -= uvlinesize;
2230 src_cr -= uvlinesize;
2232 if(!simple && FRAME_MBAFF){
2234 offset = MB_MBAFF ? 1 : 17;
2235 uvoffset= MB_MBAFF ? 1 : 9;
2237 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2238 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2239 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2240 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2246 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2247 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2248 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2249 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2255 top_idx = MB_MBAFF ? 0 : 1;
2257 step= MB_MBAFF ? 2 : 1;
2260 // There are two lines saved, the line above the the top macroblock of a pair,
2261 // and the line above the bottom macroblock
2262 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2263 for(i=1; i<17 - skiplast; i++){
2264 h->left_border[offset+i*step]= src_y[15+i* linesize];
2267 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2268 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2270 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2271 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2272 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2273 for(i=1; i<9 - skiplast; i++){
2274 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2275 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2277 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2278 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2282 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2283 MpegEncContext * const s = &h->s;
2294 if(!simple && FRAME_MBAFF){
2296 offset = MB_MBAFF ? 1 : 17;
2297 uvoffset= MB_MBAFF ? 1 : 9;
2301 top_idx = MB_MBAFF ? 0 : 1;
2303 step= MB_MBAFF ? 2 : 1;
2306 if(h->deblocking_filter == 2) {
2308 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2309 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2311 deblock_left = (s->mb_x > 0);
2312 deblock_top = (s->mb_y > !!MB_FIELD);
2315 src_y -= linesize + 1;
2316 src_cb -= uvlinesize + 1;
2317 src_cr -= uvlinesize + 1;
2319 #define XCHG(a,b,t,xchg)\
2326 for(i = !deblock_top; i<16; i++){
2327 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2329 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2333 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2334 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2335 if(s->mb_x+1 < s->mb_width){
2336 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2340 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2342 for(i = !deblock_top; i<8; i++){
2343 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2344 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2346 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2347 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2350 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2351 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2356 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2357 MpegEncContext * const s = &h->s;
2358 const int mb_x= s->mb_x;
2359 const int mb_y= s->mb_y;
2360 const int mb_xy= h->mb_xy;
2361 const int mb_type= s->current_picture.mb_type[mb_xy];
2362 uint8_t *dest_y, *dest_cb, *dest_cr;
2363 int linesize, uvlinesize /*dct_offset*/;
2365 int *block_offset = &h->block_offset[0];
2366 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2367 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2368 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2369 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2371 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2372 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2373 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2375 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2376 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2378 if (!simple && MB_FIELD) {
2379 linesize = h->mb_linesize = s->linesize * 2;
2380 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2381 block_offset = &h->block_offset[24];
2382 if(mb_y&1){ //FIXME move out of this function?
2383 dest_y -= s->linesize*15;
2384 dest_cb-= s->uvlinesize*7;
2385 dest_cr-= s->uvlinesize*7;
2389 for(list=0; list<h->list_count; list++){
2390 if(!USES_LIST(mb_type, list))
2392 if(IS_16X16(mb_type)){
2393 int8_t *ref = &h->ref_cache[list][scan8[0]];
2394 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2396 for(i=0; i<16; i+=4){
2397 int ref = h->ref_cache[list][scan8[i]];
2399 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2405 linesize = h->mb_linesize = s->linesize;
2406 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2407 // dct_offset = s->linesize * 16;
2410 if (!simple && IS_INTRA_PCM(mb_type)) {
2411 for (i=0; i<16; i++) {
2412 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2414 for (i=0; i<8; i++) {
2415 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2416 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2419 if(IS_INTRA(mb_type)){
2420 if(h->deblocking_filter)
2421 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2423 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2424 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2425 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2428 if(IS_INTRA4x4(mb_type)){
2429 if(simple || !s->encoding){
2430 if(IS_8x8DCT(mb_type)){
2431 if(transform_bypass){
2433 idct_add = s->dsp.add_pixels8;
2435 idct_dc_add = s->dsp.h264_idct8_dc_add;
2436 idct_add = s->dsp.h264_idct8_add;
2438 for(i=0; i<16; i+=4){
2439 uint8_t * const ptr= dest_y + block_offset[i];
2440 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2441 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2442 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2444 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2445 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2446 (h->topright_samples_available<<i)&0x4000, linesize);
2448 if(nnz == 1 && h->mb[i*16])
2449 idct_dc_add(ptr, h->mb + i*16, linesize);
2451 idct_add (ptr, h->mb + i*16, linesize);
2456 if(transform_bypass){
2458 idct_add = s->dsp.add_pixels4;
2460 idct_dc_add = s->dsp.h264_idct_dc_add;
2461 idct_add = s->dsp.h264_idct_add;
2463 for(i=0; i<16; i++){
2464 uint8_t * const ptr= dest_y + block_offset[i];
2465 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2467 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2468 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2472 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2473 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2474 assert(mb_y || linesize <= block_offset[i]);
2475 if(!topright_avail){
2476 tr= ptr[3 - linesize]*0x01010101;
2477 topright= (uint8_t*) &tr;
2479 topright= ptr + 4 - linesize;
2483 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2484 nnz = h->non_zero_count_cache[ scan8[i] ];
2487 if(nnz == 1 && h->mb[i*16])
2488 idct_dc_add(ptr, h->mb + i*16, linesize);
2490 idct_add (ptr, h->mb + i*16, linesize);
2492 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2499 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2501 if(!transform_bypass)
2502 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2504 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2506 if(h->deblocking_filter)
2507 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2509 hl_motion(h, dest_y, dest_cb, dest_cr,
2510 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2511 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2512 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2516 if(!IS_INTRA4x4(mb_type)){
2518 if(IS_INTRA16x16(mb_type)){
2519 if(transform_bypass){
2520 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2521 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2523 for(i=0; i<16; i++){
2524 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2525 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2529 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2531 }else if(h->cbp&15){
2532 if(transform_bypass){
2533 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2534 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2535 for(i=0; i<16; i+=di){
2536 if(h->non_zero_count_cache[ scan8[i] ]){
2537 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2541 if(IS_8x8DCT(mb_type)){
2542 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2544 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2549 for(i=0; i<16; i++){
2550 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2551 uint8_t * const ptr= dest_y + block_offset[i];
2552 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2558 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2559 uint8_t *dest[2] = {dest_cb, dest_cr};
2560 if(transform_bypass){
2561 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2562 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2563 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2565 idct_add = s->dsp.add_pixels4;
2566 for(i=16; i<16+8; i++){
2567 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2568 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2572 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2573 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2575 idct_add = s->dsp.h264_idct_add;
2576 idct_dc_add = s->dsp.h264_idct_dc_add;
2577 for(i=16; i<16+8; i++){
2578 if(h->non_zero_count_cache[ scan8[i] ])
2579 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2580 else if(h->mb[i*16])
2581 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2584 for(i=16; i<16+8; i++){
2585 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2586 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2587 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2594 if(h->deblocking_filter) {
2595 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2596 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2597 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2598 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2599 if (!simple && FRAME_MBAFF) {
2600 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2602 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2608 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2610 static void hl_decode_mb_simple(H264Context *h){
2611 hl_decode_mb_internal(h, 1);
2615 * Process a macroblock; this handles edge cases, such as interlacing.
2617 static void av_noinline hl_decode_mb_complex(H264Context *h){
2618 hl_decode_mb_internal(h, 0);
2621 static void hl_decode_mb(H264Context *h){
2622 MpegEncContext * const s = &h->s;
2623 const int mb_xy= h->mb_xy;
2624 const int mb_type= s->current_picture.mb_type[mb_xy];
2625 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2627 if(ENABLE_H264_ENCODER && !s->decode)
2631 hl_decode_mb_complex(h);
2632 else hl_decode_mb_simple(h);
2635 static void pic_as_field(Picture *pic, const int parity){
2637 for (i = 0; i < 4; ++i) {
2638 if (parity == PICT_BOTTOM_FIELD)
2639 pic->data[i] += pic->linesize[i];
2640 pic->reference = parity;
2641 pic->linesize[i] *= 2;
2643 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2646 static int split_field_copy(Picture *dest, Picture *src,
2647 int parity, int id_add){
2648 int match = !!(src->reference & parity);
2652 if(parity != PICT_FRAME){
2653 pic_as_field(dest, parity);
2655 dest->pic_id += id_add;
2662 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2666 while(i[0]<len || i[1]<len){
2667 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2669 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2672 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2673 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2676 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2677 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2684 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2689 best_poc= dir ? INT_MIN : INT_MAX;
2691 for(i=0; i<len; i++){
2692 const int poc= src[i]->poc;
2693 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2695 sorted[out_i]= src[i];
2698 if(best_poc == (dir ? INT_MIN : INT_MAX))
2700 limit= sorted[out_i++]->poc - dir;
2706 * fills the default_ref_list.
2708 static int fill_default_ref_list(H264Context *h){
2709 MpegEncContext * const s = &h->s;
2712 if(h->slice_type_nos==FF_B_TYPE){
2713 Picture *sorted[32];
2718 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2720 cur_poc= s->current_picture_ptr->poc;
2722 for(list= 0; list<2; list++){
2723 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2724 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2726 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2727 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2730 if(len < h->ref_count[list])
2731 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2735 if(lens[0] == lens[1] && lens[1] > 1){
2736 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2738 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2741 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2742 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2744 if(len < h->ref_count[0])
2745 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2748 for (i=0; i<h->ref_count[0]; i++) {
2749 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2751 if(h->slice_type_nos==FF_B_TYPE){
2752 for (i=0; i<h->ref_count[1]; i++) {
2753 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2760 static void print_short_term(H264Context *h);
2761 static void print_long_term(H264Context *h);
2764 * Extract structure information about the picture described by pic_num in
2765 * the current decoding context (frame or field). Note that pic_num is
2766 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2767 * @param pic_num picture number for which to extract structure information
2768 * @param structure one of PICT_XXX describing structure of picture
2770 * @return frame number (short term) or long term index of picture
2771 * described by pic_num
2773 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2774 MpegEncContext * const s = &h->s;
2776 *structure = s->picture_structure;
2779 /* opposite field */
2780 *structure ^= PICT_FRAME;
2787 static int decode_ref_pic_list_reordering(H264Context *h){
2788 MpegEncContext * const s = &h->s;
2789 int list, index, pic_structure;
2791 print_short_term(h);
2794 for(list=0; list<h->list_count; list++){
2795 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2797 if(get_bits1(&s->gb)){
2798 int pred= h->curr_pic_num;
2800 for(index=0; ; index++){
2801 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2802 unsigned int pic_id;
2804 Picture *ref = NULL;
2806 if(reordering_of_pic_nums_idc==3)
2809 if(index >= h->ref_count[list]){
2810 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2814 if(reordering_of_pic_nums_idc<3){
2815 if(reordering_of_pic_nums_idc<2){
2816 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2819 if(abs_diff_pic_num > h->max_pic_num){
2820 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2824 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2825 else pred+= abs_diff_pic_num;
2826 pred &= h->max_pic_num - 1;
2828 frame_num = pic_num_extract(h, pred, &pic_structure);
2830 for(i= h->short_ref_count-1; i>=0; i--){
2831 ref = h->short_ref[i];
2832 assert(ref->reference);
2833 assert(!ref->long_ref);
2835 ref->frame_num == frame_num &&
2836 (ref->reference & pic_structure)
2844 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2846 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2849 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2852 ref = h->long_ref[long_idx];
2853 assert(!(ref && !ref->reference));
2854 if(ref && (ref->reference & pic_structure)){
2855 ref->pic_id= pic_id;
2856 assert(ref->long_ref);
2864 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2865 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2867 for(i=index; i+1<h->ref_count[list]; i++){
2868 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2871 for(; i > index; i--){
2872 h->ref_list[list][i]= h->ref_list[list][i-1];
2874 h->ref_list[list][index]= *ref;
2876 pic_as_field(&h->ref_list[list][index], pic_structure);
2880 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2886 for(list=0; list<h->list_count; list++){
2887 for(index= 0; index < h->ref_count[list]; index++){
2888 if(!h->ref_list[list][index].data[0]){
2889 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2890 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2898 static void fill_mbaff_ref_list(H264Context *h){
2900 for(list=0; list<2; list++){ //FIXME try list_count
2901 for(i=0; i<h->ref_count[list]; i++){
2902 Picture *frame = &h->ref_list[list][i];
2903 Picture *field = &h->ref_list[list][16+2*i];
2906 field[0].linesize[j] <<= 1;
2907 field[0].reference = PICT_TOP_FIELD;
2908 field[0].poc= field[0].field_poc[0];
2909 field[1] = field[0];
2911 field[1].data[j] += frame->linesize[j];
2912 field[1].reference = PICT_BOTTOM_FIELD;
2913 field[1].poc= field[1].field_poc[1];
2915 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2916 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2918 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2919 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2923 for(j=0; j<h->ref_count[1]; j++){
2924 for(i=0; i<h->ref_count[0]; i++)
2925 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2926 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2927 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2931 static int pred_weight_table(H264Context *h){
2932 MpegEncContext * const s = &h->s;
2934 int luma_def, chroma_def;
2937 h->use_weight_chroma= 0;
2938 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2939 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2940 luma_def = 1<<h->luma_log2_weight_denom;
2941 chroma_def = 1<<h->chroma_log2_weight_denom;
2943 for(list=0; list<2; list++){
2944 for(i=0; i<h->ref_count[list]; i++){
2945 int luma_weight_flag, chroma_weight_flag;
2947 luma_weight_flag= get_bits1(&s->gb);
2948 if(luma_weight_flag){
2949 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2950 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2951 if( h->luma_weight[list][i] != luma_def
2952 || h->luma_offset[list][i] != 0)
2955 h->luma_weight[list][i]= luma_def;
2956 h->luma_offset[list][i]= 0;
2960 chroma_weight_flag= get_bits1(&s->gb);
2961 if(chroma_weight_flag){
2964 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2965 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2966 if( h->chroma_weight[list][i][j] != chroma_def
2967 || h->chroma_offset[list][i][j] != 0)
2968 h->use_weight_chroma= 1;
2973 h->chroma_weight[list][i][j]= chroma_def;
2974 h->chroma_offset[list][i][j]= 0;
2979 if(h->slice_type_nos != FF_B_TYPE) break;
2981 h->use_weight= h->use_weight || h->use_weight_chroma;
2985 static void implicit_weight_table(H264Context *h){
2986 MpegEncContext * const s = &h->s;
2988 int cur_poc = s->current_picture_ptr->poc;
2990 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2991 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2993 h->use_weight_chroma= 0;
2998 h->use_weight_chroma= 2;
2999 h->luma_log2_weight_denom= 5;
3000 h->chroma_log2_weight_denom= 5;
3002 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3003 int poc0 = h->ref_list[0][ref0].poc;
3004 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3005 int poc1 = h->ref_list[1][ref1].poc;
3006 int td = av_clip(poc1 - poc0, -128, 127);
3008 int tb = av_clip(cur_poc - poc0, -128, 127);
3009 int tx = (16384 + (FFABS(td) >> 1)) / td;
3010 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3011 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3012 h->implicit_weight[ref0][ref1] = 32;
3014 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3016 h->implicit_weight[ref0][ref1] = 32;
3022 * Mark a picture as no longer needed for reference. The refmask
3023 * argument allows unreferencing of individual fields or the whole frame.
3024 * If the picture becomes entirely unreferenced, but is being held for
3025 * display purposes, it is marked as such.
3026 * @param refmask mask of fields to unreference; the mask is bitwise
3027 * anded with the reference marking of pic
3028 * @return non-zero if pic becomes entirely unreferenced (except possibly
3029 * for display purposes) zero if one of the fields remains in
3032 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3034 if (pic->reference &= refmask) {
3037 for(i = 0; h->delayed_pic[i]; i++)
3038 if(pic == h->delayed_pic[i]){
3039 pic->reference=DELAYED_PIC_REF;
3047 * instantaneous decoder refresh.
3049 static void idr(H264Context *h){
3052 for(i=0; i<16; i++){
3053 remove_long(h, i, 0);
3055 assert(h->long_ref_count==0);
3057 for(i=0; i<h->short_ref_count; i++){
3058 unreference_pic(h, h->short_ref[i], 0);
3059 h->short_ref[i]= NULL;
3061 h->short_ref_count=0;
3062 h->prev_frame_num= 0;
3063 h->prev_frame_num_offset= 0;
3068 /* forget old pics after a seek */
3069 static void flush_dpb(AVCodecContext *avctx){
3070 H264Context *h= avctx->priv_data;
3072 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3073 if(h->delayed_pic[i])
3074 h->delayed_pic[i]->reference= 0;
3075 h->delayed_pic[i]= NULL;
3077 h->outputed_poc= INT_MIN;
3079 if(h->s.current_picture_ptr)
3080 h->s.current_picture_ptr->reference= 0;
3081 h->s.first_field= 0;
3082 ff_mpeg_flush(avctx);
3086 * Find a Picture in the short term reference list by frame number.
3087 * @param frame_num frame number to search for
3088 * @param idx the index into h->short_ref where returned picture is found
3089 * undefined if no picture found.
3090 * @return pointer to the found picture, or NULL if no pic with the provided
3091 * frame number is found
3093 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3094 MpegEncContext * const s = &h->s;
3097 for(i=0; i<h->short_ref_count; i++){
3098 Picture *pic= h->short_ref[i];
3099 if(s->avctx->debug&FF_DEBUG_MMCO)
3100 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3101 if(pic->frame_num == frame_num) {
3110 * Remove a picture from the short term reference list by its index in
3111 * that list. This does no checking on the provided index; it is assumed
3112 * to be valid. Other list entries are shifted down.
3113 * @param i index into h->short_ref of picture to remove.
3115 static void remove_short_at_index(H264Context *h, int i){
3116 assert(i >= 0 && i < h->short_ref_count);
3117 h->short_ref[i]= NULL;
3118 if (--h->short_ref_count)
3119 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3124 * @return the removed picture or NULL if an error occurs
3126 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3127 MpegEncContext * const s = &h->s;
3131 if(s->avctx->debug&FF_DEBUG_MMCO)
3132 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3134 pic = find_short(h, frame_num, &i);
3136 if(unreference_pic(h, pic, ref_mask))
3137 remove_short_at_index(h, i);
3144 * Remove a picture from the long term reference list by its index in
3146 * @return the removed picture or NULL if an error occurs
3148 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3151 pic= h->long_ref[i];
3153 if(unreference_pic(h, pic, ref_mask)){
3154 assert(h->long_ref[i]->long_ref == 1);
3155 h->long_ref[i]->long_ref= 0;
3156 h->long_ref[i]= NULL;
3157 h->long_ref_count--;
3165 * print short term list
3167 static void print_short_term(H264Context *h) {
3169 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3170 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3171 for(i=0; i<h->short_ref_count; i++){
3172 Picture *pic= h->short_ref[i];
3173 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3179 * print long term list
3181 static void print_long_term(H264Context *h) {
3183 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3184 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3185 for(i = 0; i < 16; i++){
3186 Picture *pic= h->long_ref[i];
3188 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3195 * Executes the reference picture marking (memory management control operations).
3197 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3198 MpegEncContext * const s = &h->s;
3200 int current_ref_assigned=0;
3203 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3204 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3206 for(i=0; i<mmco_count; i++){
3207 int structure, frame_num;
3208 if(s->avctx->debug&FF_DEBUG_MMCO)
3209 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3211 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3212 || mmco[i].opcode == MMCO_SHORT2LONG){
3213 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3214 pic = find_short(h, frame_num, &j);
3216 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3217 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3218 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3223 switch(mmco[i].opcode){
3224 case MMCO_SHORT2UNUSED:
3225 if(s->avctx->debug&FF_DEBUG_MMCO)
3226 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3227 remove_short(h, frame_num, structure ^ PICT_FRAME);
3229 case MMCO_SHORT2LONG:
3230 if (h->long_ref[mmco[i].long_arg] != pic)
3231 remove_long(h, mmco[i].long_arg, 0);
3233 remove_short_at_index(h, j);
3234 h->long_ref[ mmco[i].long_arg ]= pic;
3235 if (h->long_ref[ mmco[i].long_arg ]){
3236 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3237 h->long_ref_count++;
3240 case MMCO_LONG2UNUSED:
3241 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3242 pic = h->long_ref[j];
3244 remove_long(h, j, structure ^ PICT_FRAME);
3245 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3246 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3249 // Comment below left from previous code as it is an interresting note.
3250 /* First field in pair is in short term list or
3251 * at a different long term index.
3252 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3253 * Report the problem and keep the pair where it is,
3254 * and mark this field valid.
3257 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3258 remove_long(h, mmco[i].long_arg, 0);
3260 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3261 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3262 h->long_ref_count++;
3265 s->current_picture_ptr->reference |= s->picture_structure;
3266 current_ref_assigned=1;
3268 case MMCO_SET_MAX_LONG:
3269 assert(mmco[i].long_arg <= 16);
3270 // just remove the long term which index is greater than new max
3271 for(j = mmco[i].long_arg; j<16; j++){
3272 remove_long(h, j, 0);
3276 while(h->short_ref_count){
3277 remove_short(h, h->short_ref[0]->frame_num, 0);
3279 for(j = 0; j < 16; j++) {
3280 remove_long(h, j, 0);
3282 s->current_picture_ptr->poc=
3283 s->current_picture_ptr->field_poc[0]=
3284 s->current_picture_ptr->field_poc[1]=
3288 s->current_picture_ptr->frame_num= 0;
3294 if (!current_ref_assigned) {
3295 /* Second field of complementary field pair; the first field of
3296 * which is already referenced. If short referenced, it
3297 * should be first entry in short_ref. If not, it must exist
3298 * in long_ref; trying to put it on the short list here is an
3299 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3301 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3302 /* Just mark the second field valid */
3303 s->current_picture_ptr->reference = PICT_FRAME;
3304 } else if (s->current_picture_ptr->long_ref) {
3305 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3306 "assignment for second field "
3307 "in complementary field pair "
3308 "(first field is long term)\n");
3310 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3312 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3315 if(h->short_ref_count)
3316 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3318 h->short_ref[0]= s->current_picture_ptr;
3319 h->short_ref_count++;
3320 s->current_picture_ptr->reference |= s->picture_structure;
3324 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3326 /* We have too many reference frames, probably due to corrupted
3327 * stream. Need to discard one frame. Prevents overrun of the
3328 * short_ref and long_ref buffers.
3330 av_log(h->s.avctx, AV_LOG_ERROR,
3331 "number of reference frames exceeds max (probably "
3332 "corrupt input), discarding one\n");
3334 if (h->long_ref_count && !h->short_ref_count) {
3335 for (i = 0; i < 16; ++i)
3340 remove_long(h, i, 0);
3342 pic = h->short_ref[h->short_ref_count - 1];
3343 remove_short(h, pic->frame_num, 0);
3347 print_short_term(h);
3352 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3353 MpegEncContext * const s = &h->s;
3357 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3358 s->broken_link= get_bits1(gb) -1;
3360 h->mmco[0].opcode= MMCO_LONG;
3361 h->mmco[0].long_arg= 0;
3365 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3366 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3367 MMCOOpcode opcode= get_ue_golomb(gb);
3369 h->mmco[i].opcode= opcode;
3370 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3371 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3372 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3373 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3377 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3378 unsigned int long_arg= get_ue_golomb(gb);
3379 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3380 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3383 h->mmco[i].long_arg= long_arg;
3386 if(opcode > (unsigned)MMCO_LONG){
3387 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3390 if(opcode == MMCO_END)
3395 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3397 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3398 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3399 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3400 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3402 if (FIELD_PICTURE) {
3403 h->mmco[0].short_pic_num *= 2;
3404 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3405 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3415 static int init_poc(H264Context *h){
3416 MpegEncContext * const s = &h->s;
3417 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3419 Picture *cur = s->current_picture_ptr;
3421 h->frame_num_offset= h->prev_frame_num_offset;
3422 if(h->frame_num < h->prev_frame_num)
3423 h->frame_num_offset += max_frame_num;
3425 if(h->sps.poc_type==0){
3426 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3428 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3429 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3430 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3431 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3433 h->poc_msb = h->prev_poc_msb;
3434 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3436 field_poc[1] = h->poc_msb + h->poc_lsb;
3437 if(s->picture_structure == PICT_FRAME)
3438 field_poc[1] += h->delta_poc_bottom;
3439 }else if(h->sps.poc_type==1){
3440 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3443 if(h->sps.poc_cycle_length != 0)
3444 abs_frame_num = h->frame_num_offset + h->frame_num;
3448 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3451 expected_delta_per_poc_cycle = 0;
3452 for(i=0; i < h->sps.poc_cycle_length; i++)
3453 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3455 if(abs_frame_num > 0){
3456 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3457 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3459 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3460 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3461 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3465 if(h->nal_ref_idc == 0)
3466 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3468 field_poc[0] = expectedpoc + h->delta_poc[0];
3469 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3471 if(s->picture_structure == PICT_FRAME)
3472 field_poc[1] += h->delta_poc[1];
3474 int poc= 2*(h->frame_num_offset + h->frame_num);
3483 if(s->picture_structure != PICT_BOTTOM_FIELD)
3484 s->current_picture_ptr->field_poc[0]= field_poc[0];
3485 if(s->picture_structure != PICT_TOP_FIELD)
3486 s->current_picture_ptr->field_poc[1]= field_poc[1];
3487 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3494 * initialize scan tables
3496 static void init_scan_tables(H264Context *h){
3497 MpegEncContext * const s = &h->s;
3499 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3500 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3501 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3503 for(i=0; i<16; i++){
3504 #define T(x) (x>>2) | ((x<<2) & 0xF)
3505 h->zigzag_scan[i] = T(zigzag_scan[i]);
3506 h-> field_scan[i] = T( field_scan[i]);
3510 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3511 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3512 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3513 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3514 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3516 for(i=0; i<64; i++){
3517 #define T(x) (x>>3) | ((x&7)<<3)
3518 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3519 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3520 h->field_scan8x8[i] = T(field_scan8x8[i]);
3521 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3525 if(h->sps.transform_bypass){ //FIXME same ugly
3526 h->zigzag_scan_q0 = zigzag_scan;
3527 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3528 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3529 h->field_scan_q0 = field_scan;
3530 h->field_scan8x8_q0 = field_scan8x8;
3531 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3533 h->zigzag_scan_q0 = h->zigzag_scan;
3534 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3535 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3536 h->field_scan_q0 = h->field_scan;
3537 h->field_scan8x8_q0 = h->field_scan8x8;
3538 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3543 * Replicates H264 "master" context to thread contexts.
3545 static void clone_slice(H264Context *dst, H264Context *src)
3547 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3548 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3549 dst->s.current_picture = src->s.current_picture;
3550 dst->s.linesize = src->s.linesize;
3551 dst->s.uvlinesize = src->s.uvlinesize;
3552 dst->s.first_field = src->s.first_field;
3554 dst->prev_poc_msb = src->prev_poc_msb;
3555 dst->prev_poc_lsb = src->prev_poc_lsb;
3556 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3557 dst->prev_frame_num = src->prev_frame_num;
3558 dst->short_ref_count = src->short_ref_count;
3560 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3561 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3562 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3563 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3565 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3566 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3570 * decodes a slice header.
3571 * This will also call MPV_common_init() and frame_start() as needed.
3573 * @param h h264context
3574 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3576 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3578 static int decode_slice_header(H264Context *h, H264Context *h0){
3579 MpegEncContext * const s = &h->s;
3580 MpegEncContext * const s0 = &h0->s;
3581 unsigned int first_mb_in_slice;
3582 unsigned int pps_id;
3583 int num_ref_idx_active_override_flag;
3584 unsigned int slice_type, tmp, i, j;
3585 int default_ref_list_done = 0;
3586 int last_pic_structure;
3588 s->dropable= h->nal_ref_idc == 0;
3590 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3591 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3592 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3594 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3595 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3598 first_mb_in_slice= get_ue_golomb(&s->gb);
3600 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3601 h0->current_slice = 0;
3602 if (!s0->first_field)
3603 s->current_picture_ptr= NULL;
3606 slice_type= get_ue_golomb(&s->gb);
3608 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3613 h->slice_type_fixed=1;
3615 h->slice_type_fixed=0;
3617 slice_type= golomb_to_pict_type[ slice_type ];
3618 if (slice_type == FF_I_TYPE
3619 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3620 default_ref_list_done = 1;
3622 h->slice_type= slice_type;
3623 h->slice_type_nos= slice_type & 3;
3625 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3626 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3627 av_log(h->s.avctx, AV_LOG_ERROR,
3628 "B picture before any references, skipping\n");
3632 pps_id= get_ue_golomb(&s->gb);
3633 if(pps_id>=MAX_PPS_COUNT){
3634 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3637 if(!h0->pps_buffers[pps_id]) {
3638 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3641 h->pps= *h0->pps_buffers[pps_id];
3643 if(!h0->sps_buffers[h->pps.sps_id]) {
3644 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3647 h->sps = *h0->sps_buffers[h->pps.sps_id];
3649 if(h == h0 && h->dequant_coeff_pps != pps_id){
3650 h->dequant_coeff_pps = pps_id;
3651 init_dequant_tables(h);
3654 s->mb_width= h->sps.mb_width;
3655 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3657 h->b_stride= s->mb_width*4;
3658 h->b8_stride= s->mb_width*2;
3660 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3661 if(h->sps.frame_mbs_only_flag)
3662 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3664 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3666 if (s->context_initialized
3667 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3669 return -1; // width / height changed during parallelized decoding
3671 flush_dpb(s->avctx);
3674 if (!s->context_initialized) {
3676 return -1; // we cant (re-)initialize context during parallel decoding
3677 if (MPV_common_init(s) < 0)
3681 init_scan_tables(h);
3684 for(i = 1; i < s->avctx->thread_count; i++) {
3686 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3687 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3688 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3691 init_scan_tables(c);
3695 for(i = 0; i < s->avctx->thread_count; i++)
3696 if(context_init(h->thread_context[i]) < 0)
3699 s->avctx->width = s->width;
3700 s->avctx->height = s->height;
3701 s->avctx->sample_aspect_ratio= h->sps.sar;
3702 if(!s->avctx->sample_aspect_ratio.den)
3703 s->avctx->sample_aspect_ratio.den = 1;
3705 if(h->sps.timing_info_present_flag){
3706 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3707 if(h->x264_build > 0 && h->x264_build < 44)
3708 s->avctx->time_base.den *= 2;
3709 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3710 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3714 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3717 h->mb_aff_frame = 0;
3718 last_pic_structure = s0->picture_structure;
3719 if(h->sps.frame_mbs_only_flag){
3720 s->picture_structure= PICT_FRAME;
3722 if(get_bits1(&s->gb)) { //field_pic_flag
3723 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3725 s->picture_structure= PICT_FRAME;
3726 h->mb_aff_frame = h->sps.mb_aff;
3729 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3731 if(h0->current_slice == 0){
3732 while(h->frame_num != h->prev_frame_num &&
3733 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3734 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3736 h->prev_frame_num++;
3737 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3738 s->current_picture_ptr->frame_num= h->prev_frame_num;
3739 execute_ref_pic_marking(h, NULL, 0);
3742 /* See if we have a decoded first field looking for a pair... */
3743 if (s0->first_field) {
3744 assert(s0->current_picture_ptr);
3745 assert(s0->current_picture_ptr->data[0]);
3746 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3748 /* figure out if we have a complementary field pair */
3749 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3751 * Previous field is unmatched. Don't display it, but let it
3752 * remain for reference if marked as such.
3754 s0->current_picture_ptr = NULL;
3755 s0->first_field = FIELD_PICTURE;
3758 if (h->nal_ref_idc &&
3759 s0->current_picture_ptr->reference &&
3760 s0->current_picture_ptr->frame_num != h->frame_num) {
3762 * This and previous field were reference, but had
3763 * different frame_nums. Consider this field first in
3764 * pair. Throw away previous field except for reference
3767 s0->first_field = 1;
3768 s0->current_picture_ptr = NULL;
3771 /* Second field in complementary pair */
3772 s0->first_field = 0;
3777 /* Frame or first field in a potentially complementary pair */
3778 assert(!s0->current_picture_ptr);
3779 s0->first_field = FIELD_PICTURE;
3782 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3783 s0->first_field = 0;
3790 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3792 assert(s->mb_num == s->mb_width * s->mb_height);
3793 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3794 first_mb_in_slice >= s->mb_num){
3795 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3798 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3799 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3800 if (s->picture_structure == PICT_BOTTOM_FIELD)
3801 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3802 assert(s->mb_y < s->mb_height);
3804 if(s->picture_structure==PICT_FRAME){
3805 h->curr_pic_num= h->frame_num;
3806 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3808 h->curr_pic_num= 2*h->frame_num + 1;
3809 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3812 if(h->nal_unit_type == NAL_IDR_SLICE){
3813 get_ue_golomb(&s->gb); /* idr_pic_id */
3816 if(h->sps.poc_type==0){
3817 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3819 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3820 h->delta_poc_bottom= get_se_golomb(&s->gb);
3824 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3825 h->delta_poc[0]= get_se_golomb(&s->gb);
3827 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3828 h->delta_poc[1]= get_se_golomb(&s->gb);
3833 if(h->pps.redundant_pic_cnt_present){
3834 h->redundant_pic_count= get_ue_golomb(&s->gb);
3837 //set defaults, might be overridden a few lines later
3838 h->ref_count[0]= h->pps.ref_count[0];
3839 h->ref_count[1]= h->pps.ref_count[1];
3841 if(h->slice_type_nos != FF_I_TYPE){
3842 if(h->slice_type_nos == FF_B_TYPE){
3843 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3845 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3847 if(num_ref_idx_active_override_flag){
3848 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3849 if(h->slice_type_nos==FF_B_TYPE)
3850 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3852 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3853 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3854 h->ref_count[0]= h->ref_count[1]= 1;
3858 if(h->slice_type_nos == FF_B_TYPE)
3865 if(!default_ref_list_done){
3866 fill_default_ref_list(h);
3869 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3872 if(h->slice_type_nos!=FF_I_TYPE){
3873 s->last_picture_ptr= &h->ref_list[0][0];
3874 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3876 if(h->slice_type_nos==FF_B_TYPE){
3877 s->next_picture_ptr= &h->ref_list[1][0];
3878 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3881 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3882 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3883 pred_weight_table(h);
3884 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3885 implicit_weight_table(h);
3890 decode_ref_pic_marking(h0, &s->gb);
3893 fill_mbaff_ref_list(h);
3895 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3896 direct_dist_scale_factor(h);
3897 direct_ref_list_init(h);
3899 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3900 tmp = get_ue_golomb(&s->gb);
3902 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3905 h->cabac_init_idc= tmp;
3908 h->last_qscale_diff = 0;
3909 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3911 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3915 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3916 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3917 //FIXME qscale / qp ... stuff
3918 if(h->slice_type == FF_SP_TYPE){
3919 get_bits1(&s->gb); /* sp_for_switch_flag */
3921 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3922 get_se_golomb(&s->gb); /* slice_qs_delta */
3925 h->deblocking_filter = 1;
3926 h->slice_alpha_c0_offset = 0;
3927 h->slice_beta_offset = 0;
3928 if( h->pps.deblocking_filter_parameters_present ) {
3929 tmp= get_ue_golomb(&s->gb);
3931 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3934 h->deblocking_filter= tmp;
3935 if(h->deblocking_filter < 2)
3936 h->deblocking_filter^= 1; // 1<->0
3938 if( h->deblocking_filter ) {
3939 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3940 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3944 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3945 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3946 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3947 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3948 h->deblocking_filter= 0;
3950 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3951 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3952 /* Cheat slightly for speed:
3953 Do not bother to deblock across slices. */
3954 h->deblocking_filter = 2;
3956 h0->max_contexts = 1;
3957 if(!h0->single_decode_warning) {
3958 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3959 h0->single_decode_warning = 1;
3962 return 1; // deblocking switched inside frame
3967 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3968 slice_group_change_cycle= get_bits(&s->gb, ?);
3971 h0->last_slice_type = slice_type;
3972 h->slice_num = ++h0->current_slice;
3973 if(h->slice_num >= MAX_SLICES){
3974 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3978 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3982 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3983 +(h->ref_list[j][i].reference&3);
3986 for(i=16; i<48; i++)
3987 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3988 +(h->ref_list[j][i].reference&3);
3991 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3992 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3994 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3995 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3997 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3999 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4000 pps_id, h->frame_num,
4001 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4002 h->ref_count[0], h->ref_count[1],
4004 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4006 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4007 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4017 static inline int get_level_prefix(GetBitContext *gb){
4021 OPEN_READER(re, gb);
4022 UPDATE_CACHE(re, gb);
4023 buf=GET_CACHE(re, gb);
4025 log= 32 - av_log2(buf);
4027 print_bin(buf>>(32-log), log);
4028 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4031 LAST_SKIP_BITS(re, gb, log);
4032 CLOSE_READER(re, gb);
4037 static inline int get_dct8x8_allowed(H264Context *h){
4040 if(!IS_SUB_8X8(h->sub_mb_type[i])
4041 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4048 * decodes a residual block.
4049 * @param n block index
4050 * @param scantable scantable
4051 * @param max_coeff number of coefficients in the block
4052 * @return <0 if an error occurred
4054 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4055 MpegEncContext * const s = &h->s;
4056 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4058 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4060 //FIXME put trailing_onex into the context
4062 if(n == CHROMA_DC_BLOCK_INDEX){
4063 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4064 total_coeff= coeff_token>>2;
4066 if(n == LUMA_DC_BLOCK_INDEX){
4067 total_coeff= pred_non_zero_count(h, 0);
4068 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4069 total_coeff= coeff_token>>2;
4071 total_coeff= pred_non_zero_count(h, n);
4072 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4073 total_coeff= coeff_token>>2;
4074 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4078 //FIXME set last_non_zero?
4082 if(total_coeff > (unsigned)max_coeff) {
4083 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4087 trailing_ones= coeff_token&3;
4088 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4089 assert(total_coeff<=16);
4091 i = show_bits(gb, 3);
4092 skip_bits(gb, trailing_ones);
4093 level[0] = 1-((i&4)>>1);
4094 level[1] = 1-((i&2) );
4095 level[2] = 1-((i&1)<<1);
4097 if(trailing_ones<total_coeff) {
4098 int level_code, mask;
4099 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4100 int prefix= get_level_prefix(gb);
4102 //first coefficient has suffix_length equal to 0 or 1
4103 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4105 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4107 level_code= (prefix<<suffix_length); //part
4108 }else if(prefix==14){
4110 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4112 level_code= prefix + get_bits(gb, 4); //part
4114 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4115 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4117 level_code += (1<<(prefix-3))-4096;
4120 if(trailing_ones < 3) level_code += 2;
4125 mask= -(level_code&1);
4126 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4128 //remaining coefficients have suffix_length > 0
4129 for(i=trailing_ones+1;i<total_coeff;i++) {
4130 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4131 prefix = get_level_prefix(gb);
4133 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4135 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4137 level_code += (1<<(prefix-3))-4096;
4139 mask= -(level_code&1);
4140 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4141 if(level_code > suffix_limit[suffix_length])
4146 if(total_coeff == max_coeff)
4149 if(n == CHROMA_DC_BLOCK_INDEX)
4150 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4152 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4155 coeff_num = zeros_left + total_coeff - 1;
4156 j = scantable[coeff_num];
4158 block[j] = level[0];
4159 for(i=1;i<total_coeff;i++) {
4162 else if(zeros_left < 7){
4163 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4165 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4167 zeros_left -= run_before;
4168 coeff_num -= 1 + run_before;
4169 j= scantable[ coeff_num ];
4174 block[j] = (level[0] * qmul[j] + 32)>>6;
4175 for(i=1;i<total_coeff;i++) {
4178 else if(zeros_left < 7){
4179 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4181 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4183 zeros_left -= run_before;
4184 coeff_num -= 1 + run_before;
4185 j= scantable[ coeff_num ];
4187 block[j]= (level[i] * qmul[j] + 32)>>6;
4192 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4199 static void predict_field_decoding_flag(H264Context *h){
4200 MpegEncContext * const s = &h->s;
4201 const int mb_xy= h->mb_xy;
4202 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4203 ? s->current_picture.mb_type[mb_xy-1]
4204 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4205 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4207 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4211 * decodes a P_SKIP or B_SKIP macroblock
4213 static void decode_mb_skip(H264Context *h){
4214 MpegEncContext * const s = &h->s;
4215 const int mb_xy= h->mb_xy;
4218 memset(h->non_zero_count[mb_xy], 0, 16);
4219 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4222 mb_type|= MB_TYPE_INTERLACED;
4224 if( h->slice_type_nos == FF_B_TYPE )
4226 // just for fill_caches. pred_direct_motion will set the real mb_type
4227 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4229 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4230 pred_direct_motion(h, &mb_type);
4231 mb_type|= MB_TYPE_SKIP;
4236 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4238 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4239 pred_pskip_motion(h, &mx, &my);
4240 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4241 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4244 write_back_motion(h, mb_type);
4245 s->current_picture.mb_type[mb_xy]= mb_type;
4246 s->current_picture.qscale_table[mb_xy]= s->qscale;
4247 h->slice_table[ mb_xy ]= h->slice_num;
4248 h->prev_mb_skipped= 1;
4252 * decodes a macroblock
4253 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4255 static int decode_mb_cavlc(H264Context *h){
4256 MpegEncContext * const s = &h->s;
4258 int partition_count;
4259 unsigned int mb_type, cbp;
4260 int dct8x8_allowed= h->pps.transform_8x8_mode;
4262 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4264 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4266 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4267 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4269 if(h->slice_type_nos != FF_I_TYPE){
4270 if(s->mb_skip_run==-1)
4271 s->mb_skip_run= get_ue_golomb(&s->gb);
4273 if (s->mb_skip_run--) {
4274 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4275 if(s->mb_skip_run==0)
4276 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4278 predict_field_decoding_flag(h);
4285 if( (s->mb_y&1) == 0 )
4286 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4289 h->prev_mb_skipped= 0;
4291 mb_type= get_ue_golomb(&s->gb);
4292 if(h->slice_type_nos == FF_B_TYPE){
4294 partition_count= b_mb_type_info[mb_type].partition_count;
4295 mb_type= b_mb_type_info[mb_type].type;
4298 goto decode_intra_mb;
4300 }else if(h->slice_type_nos == FF_P_TYPE){
4302 partition_count= p_mb_type_info[mb_type].partition_count;
4303 mb_type= p_mb_type_info[mb_type].type;
4306 goto decode_intra_mb;
4309 assert(h->slice_type_nos == FF_I_TYPE);
4310 if(h->slice_type == FF_SI_TYPE && mb_type)
4314 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4318 cbp= i_mb_type_info[mb_type].cbp;
4319 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4320 mb_type= i_mb_type_info[mb_type].type;
4324 mb_type |= MB_TYPE_INTERLACED;
4326 h->slice_table[ mb_xy ]= h->slice_num;
4328 if(IS_INTRA_PCM(mb_type)){
4331 // We assume these blocks are very rare so we do not optimize it.
4332 align_get_bits(&s->gb);
4334 // The pixels are stored in the same order as levels in h->mb array.
4335 for(x=0; x < (CHROMA ? 384 : 256); x++){
4336 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4339 // In deblocking, the quantizer is 0
4340 s->current_picture.qscale_table[mb_xy]= 0;
4341 // All coeffs are present
4342 memset(h->non_zero_count[mb_xy], 16, 16);
4344 s->current_picture.mb_type[mb_xy]= mb_type;
4349 h->ref_count[0] <<= 1;
4350 h->ref_count[1] <<= 1;
4353 fill_caches(h, mb_type, 0);
4356 if(IS_INTRA(mb_type)){
4358 // init_top_left_availability(h);
4359 if(IS_INTRA4x4(mb_type)){
4362 if(dct8x8_allowed && get_bits1(&s->gb)){
4363 mb_type |= MB_TYPE_8x8DCT;
4367 // fill_intra4x4_pred_table(h);
4368 for(i=0; i<16; i+=di){
4369 int mode= pred_intra_mode(h, i);
4371 if(!get_bits1(&s->gb)){
4372 const int rem_mode= get_bits(&s->gb, 3);
4373 mode = rem_mode + (rem_mode >= mode);
4377 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4379 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4381 write_back_intra_pred_mode(h);
4382 if( check_intra4x4_pred_mode(h) < 0)
4385 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4386 if(h->intra16x16_pred_mode < 0)
4390 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4393 h->chroma_pred_mode= pred_mode;
4395 }else if(partition_count==4){
4396 int i, j, sub_partition_count[4], list, ref[2][4];
4398 if(h->slice_type_nos == FF_B_TYPE){
4400 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4401 if(h->sub_mb_type[i] >=13){
4402 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4405 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4406 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4408 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4409 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4410 pred_direct_motion(h, &mb_type);
4411 h->ref_cache[0][scan8[4]] =
4412 h->ref_cache[1][scan8[4]] =
4413 h->ref_cache[0][scan8[12]] =
4414 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4417 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4419 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4420 if(h->sub_mb_type[i] >=4){
4421 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4424 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4425 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4429 for(list=0; list<h->list_count; list++){
4430 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4432 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4433 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4434 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4436 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4448 dct8x8_allowed = get_dct8x8_allowed(h);
4450 for(list=0; list<h->list_count; list++){
4452 if(IS_DIRECT(h->sub_mb_type[i])) {
4453 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4456 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4457 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4459 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4460 const int sub_mb_type= h->sub_mb_type[i];
4461 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4462 for(j=0; j<sub_partition_count[i]; j++){
4464 const int index= 4*i + block_width*j;
4465 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4466 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4467 mx += get_se_golomb(&s->gb);
4468 my += get_se_golomb(&s->gb);
4469 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4471 if(IS_SUB_8X8(sub_mb_type)){
4473 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4475 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4476 }else if(IS_SUB_8X4(sub_mb_type)){
4477 mv_cache[ 1 ][0]= mx;
4478 mv_cache[ 1 ][1]= my;
4479 }else if(IS_SUB_4X8(sub_mb_type)){
4480 mv_cache[ 8 ][0]= mx;
4481 mv_cache[ 8 ][1]= my;
4483 mv_cache[ 0 ][0]= mx;
4484 mv_cache[ 0 ][1]= my;
4487 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4493 }else if(IS_DIRECT(mb_type)){
4494 pred_direct_motion(h, &mb_type);
4495 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4497 int list, mx, my, i;
4498 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4499 if(IS_16X16(mb_type)){
4500 for(list=0; list<h->list_count; list++){
4502 if(IS_DIR(mb_type, 0, list)){
4503 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4504 if(val >= h->ref_count[list]){
4505 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4509 val= LIST_NOT_USED&0xFF;
4510 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4512 for(list=0; list<h->list_count; list++){
4514 if(IS_DIR(mb_type, 0, list)){
4515 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4516 mx += get_se_golomb(&s->gb);
4517 my += get_se_golomb(&s->gb);
4518 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4520 val= pack16to32(mx,my);
4523 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4526 else if(IS_16X8(mb_type)){
4527 for(list=0; list<h->list_count; list++){
4530 if(IS_DIR(mb_type, i, list)){
4531 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4532 if(val >= h->ref_count[list]){
4533 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4537 val= LIST_NOT_USED&0xFF;
4538 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4541 for(list=0; list<h->list_count; list++){
4544 if(IS_DIR(mb_type, i, list)){
4545 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4546 mx += get_se_golomb(&s->gb);
4547 my += get_se_golomb(&s->gb);
4548 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4550 val= pack16to32(mx,my);
4553 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4557 assert(IS_8X16(mb_type));
4558 for(list=0; list<h->list_count; list++){
4561 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4562 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4563 if(val >= h->ref_count[list]){
4564 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4568 val= LIST_NOT_USED&0xFF;
4569 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4572 for(list=0; list<h->list_count; list++){
4575 if(IS_DIR(mb_type, i, list)){
4576 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4577 mx += get_se_golomb(&s->gb);
4578 my += get_se_golomb(&s->gb);
4579 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4581 val= pack16to32(mx,my);
4584 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4590 if(IS_INTER(mb_type))
4591 write_back_motion(h, mb_type);
4593 if(!IS_INTRA16x16(mb_type)){
4594 cbp= get_ue_golomb(&s->gb);
4596 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4601 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4602 else cbp= golomb_to_inter_cbp [cbp];
4604 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4605 else cbp= golomb_to_inter_cbp_gray[cbp];
4610 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4611 if(get_bits1(&s->gb)){
4612 mb_type |= MB_TYPE_8x8DCT;
4613 h->cbp_table[mb_xy]= cbp;
4616 s->current_picture.mb_type[mb_xy]= mb_type;
4618 if(cbp || IS_INTRA16x16(mb_type)){
4619 int i8x8, i4x4, chroma_idx;
4621 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4622 const uint8_t *scan, *scan8x8, *dc_scan;
4624 // fill_non_zero_count_cache(h);
4626 if(IS_INTERLACED(mb_type)){
4627 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4628 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4629 dc_scan= luma_dc_field_scan;
4631 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4632 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4633 dc_scan= luma_dc_zigzag_scan;
4636 dquant= get_se_golomb(&s->gb);
4638 if( dquant > 25 || dquant < -26 ){
4639 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4643 s->qscale += dquant;
4644 if(((unsigned)s->qscale) > 51){
4645 if(s->qscale<0) s->qscale+= 52;
4646 else s->qscale-= 52;
4649 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4650 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4651 if(IS_INTRA16x16(mb_type)){
4652 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4653 return -1; //FIXME continue if partitioned and other return -1 too
4656 assert((cbp&15) == 0 || (cbp&15) == 15);
4659 for(i8x8=0; i8x8<4; i8x8++){
4660 for(i4x4=0; i4x4<4; i4x4++){
4661 const int index= i4x4 + 4*i8x8;
4662 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4668 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4671 for(i8x8=0; i8x8<4; i8x8++){
4672 if(cbp & (1<<i8x8)){
4673 if(IS_8x8DCT(mb_type)){
4674 DCTELEM *buf = &h->mb[64*i8x8];
4676 for(i4x4=0; i4x4<4; i4x4++){
4677 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4678 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4681 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4682 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4684 for(i4x4=0; i4x4<4; i4x4++){
4685 const int index= i4x4 + 4*i8x8;
4687 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4693 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4694 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4700 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4701 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4707 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4708 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4709 for(i4x4=0; i4x4<4; i4x4++){
4710 const int index= 16 + 4*chroma_idx + i4x4;
4711 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4717 uint8_t * const nnz= &h->non_zero_count_cache[0];
4718 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4719 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4722 uint8_t * const nnz= &h->non_zero_count_cache[0];
4723 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4724 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4725 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4727 s->current_picture.qscale_table[mb_xy]= s->qscale;
4728 write_back_non_zero_count(h);
4731 h->ref_count[0] >>= 1;
4732 h->ref_count[1] >>= 1;
4738 static int decode_cabac_field_decoding_flag(H264Context *h) {
4739 MpegEncContext * const s = &h->s;
4740 const int mb_x = s->mb_x;
4741 const int mb_y = s->mb_y & ~1;
4742 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4743 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4745 unsigned int ctx = 0;
4747 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4750 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4754 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4757 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4758 uint8_t *state= &h->cabac_state[ctx_base];
4762 MpegEncContext * const s = &h->s;
4763 const int mba_xy = h->left_mb_xy[0];
4764 const int mbb_xy = h->top_mb_xy;
4766 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4768 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4770 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4771 return 0; /* I4x4 */
4774 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4775 return 0; /* I4x4 */
4778 if( get_cabac_terminate( &h->cabac ) )
4779 return 25; /* PCM */
4781 mb_type = 1; /* I16x16 */
4782 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4783 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4784 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4785 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4786 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4790 static int decode_cabac_mb_type( H264Context *h ) {
4791 MpegEncContext * const s = &h->s;
4793 if( h->slice_type_nos == FF_I_TYPE ) {
4794 return decode_cabac_intra_mb_type(h, 3, 1);
4795 } else if( h->slice_type_nos == FF_P_TYPE ) {
4796 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4798 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4799 /* P_L0_D16x16, P_8x8 */
4800 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4802 /* P_L0_D8x16, P_L0_D16x8 */
4803 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4806 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4809 const int mba_xy = h->left_mb_xy[0];
4810 const int mbb_xy = h->top_mb_xy;
4813 assert(h->slice_type_nos == FF_B_TYPE);
4815 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4817 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4820 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4821 return 0; /* B_Direct_16x16 */
4823 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4824 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4827 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4828 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4829 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4830 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4832 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4833 else if( bits == 13 ) {
4834 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4835 } else if( bits == 14 )
4836 return 11; /* B_L1_L0_8x16 */
4837 else if( bits == 15 )
4838 return 22; /* B_8x8 */
4840 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4841 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4845 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4846 MpegEncContext * const s = &h->s;
4850 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4851 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4854 && h->slice_table[mba_xy] == h->slice_num
4855 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4856 mba_xy += s->mb_stride;
4858 mbb_xy = mb_xy - s->mb_stride;
4860 && h->slice_table[mbb_xy] == h->slice_num
4861 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4862 mbb_xy -= s->mb_stride;
4864 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4866 int mb_xy = h->mb_xy;
4868 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4871 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4873 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4876 if( h->slice_type_nos == FF_B_TYPE )
4878 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4881 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4884 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4887 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4888 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4889 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4891 if( mode >= pred_mode )
4897 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4898 const int mba_xy = h->left_mb_xy[0];
4899 const int mbb_xy = h->top_mb_xy;
4903 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4904 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4907 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4910 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4913 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4915 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4921 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4922 int cbp_b, cbp_a, ctx, cbp = 0;
4924 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4925 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4927 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4928 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4929 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4930 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4931 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4932 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4933 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4934 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4937 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4941 cbp_a = (h->left_cbp>>4)&0x03;
4942 cbp_b = (h-> top_cbp>>4)&0x03;
4945 if( cbp_a > 0 ) ctx++;
4946 if( cbp_b > 0 ) ctx += 2;
4947 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4951 if( cbp_a == 2 ) ctx++;
4952 if( cbp_b == 2 ) ctx += 2;
4953 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4955 static int decode_cabac_mb_dqp( H264Context *h) {
4959 if( h->last_qscale_diff != 0 )
4962 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4968 if(val > 102) //prevent infinite loop
4975 return -(val + 1)/2;
4977 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4978 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4980 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4982 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4986 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4988 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4989 return 0; /* B_Direct_8x8 */
4990 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4991 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4993 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4994 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4995 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4998 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4999 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5003 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5004 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5007 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5008 int refa = h->ref_cache[list][scan8[n] - 1];
5009 int refb = h->ref_cache[list][scan8[n] - 8];
5013 if( h->slice_type_nos == FF_B_TYPE) {
5014 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5016 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5025 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5031 if(ref >= 32 /*h->ref_list[list]*/){
5038 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5039 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5040 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5041 int ctxbase = (l == 0) ? 40 : 47;
5043 int ctx = (amvd>2) + (amvd>32);
5045 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5050 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5058 while( get_cabac_bypass( &h->cabac ) ) {
5062 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5067 if( get_cabac_bypass( &h->cabac ) )
5071 return get_cabac_bypass_sign( &h->cabac, -mvd );
5074 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5080 nza = h->left_cbp&0x100;
5081 nzb = h-> top_cbp&0x100;
5083 nza = (h->left_cbp>>(6+idx))&0x01;
5084 nzb = (h-> top_cbp>>(6+idx))&0x01;
5087 assert(cat == 1 || cat == 2 || cat == 4);
5088 nza = h->non_zero_count_cache[scan8[idx] - 1];
5089 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5098 return ctx + 4 * cat;
5101 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5102 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5103 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5104 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5105 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5108 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5109 static const int significant_coeff_flag_offset[2][6] = {
5110 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5111 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5113 static const int last_coeff_flag_offset[2][6] = {
5114 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5115 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5117 static const int coeff_abs_level_m1_offset[6] = {
5118 227+0, 227+10, 227+20, 227+30, 227+39, 426
5120 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5121 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5122 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5123 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5124 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5125 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5126 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5127 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5128 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5130 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5131 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5132 * map node ctx => cabac ctx for level=1 */
5133 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5134 /* map node ctx => cabac ctx for level>1 */
5135 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5136 static const uint8_t coeff_abs_level_transition[2][8] = {
5137 /* update node ctx after decoding a level=1 */
5138 { 1, 2, 3, 3, 4, 5, 6, 7 },
5139 /* update node ctx after decoding a level>1 */
5140 { 4, 4, 4, 4, 5, 6, 7, 7 }
5146 int coeff_count = 0;
5149 uint8_t *significant_coeff_ctx_base;
5150 uint8_t *last_coeff_ctx_base;
5151 uint8_t *abs_level_m1_ctx_base;
5154 #define CABAC_ON_STACK
5156 #ifdef CABAC_ON_STACK
5159 cc.range = h->cabac.range;
5160 cc.low = h->cabac.low;
5161 cc.bytestream= h->cabac.bytestream;
5163 #define CC &h->cabac
5167 /* cat: 0-> DC 16x16 n = 0
5168 * 1-> AC 16x16 n = luma4x4idx
5169 * 2-> Luma4x4 n = luma4x4idx
5170 * 3-> DC Chroma n = iCbCr
5171 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5172 * 5-> Luma8x8 n = 4 * luma8x8idx
5175 /* read coded block flag */
5176 if( is_dc || cat != 5 ) {
5177 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5179 h->non_zero_count_cache[scan8[n]] = 0;
5181 #ifdef CABAC_ON_STACK
5182 h->cabac.range = cc.range ;
5183 h->cabac.low = cc.low ;
5184 h->cabac.bytestream= cc.bytestream;
5190 significant_coeff_ctx_base = h->cabac_state
5191 + significant_coeff_flag_offset[MB_FIELD][cat];
5192 last_coeff_ctx_base = h->cabac_state
5193 + last_coeff_flag_offset[MB_FIELD][cat];
5194 abs_level_m1_ctx_base = h->cabac_state
5195 + coeff_abs_level_m1_offset[cat];
5197 if( !is_dc && cat == 5 ) {
5198 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5199 for(last= 0; last < coefs; last++) { \
5200 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5201 if( get_cabac( CC, sig_ctx )) { \
5202 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5203 index[coeff_count++] = last; \
5204 if( get_cabac( CC, last_ctx ) ) { \
5210 if( last == max_coeff -1 ) {\
5211 index[coeff_count++] = last;\
5213 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5214 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5215 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5217 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5219 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5221 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5224 assert(coeff_count > 0);
5228 h->cbp_table[h->mb_xy] |= 0x100;
5230 h->cbp_table[h->mb_xy] |= 0x40 << n;
5233 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5235 assert( cat == 1 || cat == 2 || cat == 4 );
5236 h->non_zero_count_cache[scan8[n]] = coeff_count;
5241 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5243 int j= scantable[index[--coeff_count]];
5245 if( get_cabac( CC, ctx ) == 0 ) {
5246 node_ctx = coeff_abs_level_transition[0][node_ctx];
5248 block[j] = get_cabac_bypass_sign( CC, -1);
5250 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5254 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5255 node_ctx = coeff_abs_level_transition[1][node_ctx];
5257 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5261 if( coeff_abs >= 15 ) {
5263 while( get_cabac_bypass( CC ) ) {
5269 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5275 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5277 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5280 } while( coeff_count );
5281 #ifdef CABAC_ON_STACK
5282 h->cabac.range = cc.range ;
5283 h->cabac.low = cc.low ;
5284 h->cabac.bytestream= cc.bytestream;
5289 #ifndef CONFIG_SMALL
5290 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5291 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5294 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5295 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5299 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5301 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5303 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5304 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5308 static inline void compute_mb_neighbors(H264Context *h)
5310 MpegEncContext * const s = &h->s;
5311 const int mb_xy = h->mb_xy;
5312 h->top_mb_xy = mb_xy - s->mb_stride;
5313 h->left_mb_xy[0] = mb_xy - 1;
5315 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5316 const int top_pair_xy = pair_xy - s->mb_stride;
5317 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5318 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5319 const int curr_mb_frame_flag = !MB_FIELD;
5320 const int bottom = (s->mb_y & 1);
5322 ? !curr_mb_frame_flag // bottom macroblock
5323 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5325 h->top_mb_xy -= s->mb_stride;
5327 if (left_mb_frame_flag != curr_mb_frame_flag) {
5328 h->left_mb_xy[0] = pair_xy - 1;
5330 } else if (FIELD_PICTURE) {
5331 h->top_mb_xy -= s->mb_stride;
5337 * decodes a macroblock
5338 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5340 static int decode_mb_cabac(H264Context *h) {
5341 MpegEncContext * const s = &h->s;
5343 int mb_type, partition_count, cbp = 0;
5344 int dct8x8_allowed= h->pps.transform_8x8_mode;
5346 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5348 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5350 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5351 if( h->slice_type_nos != FF_I_TYPE ) {
5353 /* a skipped mb needs the aff flag from the following mb */
5354 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5355 predict_field_decoding_flag(h);
5356 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5357 skip = h->next_mb_skipped;
5359 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5360 /* read skip flags */
5362 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5363 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5364 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5365 if(h->next_mb_skipped)
5366 predict_field_decoding_flag(h);
5368 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5373 h->cbp_table[mb_xy] = 0;
5374 h->chroma_pred_mode_table[mb_xy] = 0;
5375 h->last_qscale_diff = 0;
5382 if( (s->mb_y&1) == 0 )
5384 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5387 h->prev_mb_skipped = 0;
5389 compute_mb_neighbors(h);
5390 mb_type = decode_cabac_mb_type( h );
5391 assert(mb_type >= 0);
5393 if( h->slice_type_nos == FF_B_TYPE ) {
5395 partition_count= b_mb_type_info[mb_type].partition_count;
5396 mb_type= b_mb_type_info[mb_type].type;
5399 goto decode_intra_mb;
5401 } else if( h->slice_type_nos == FF_P_TYPE ) {
5403 partition_count= p_mb_type_info[mb_type].partition_count;
5404 mb_type= p_mb_type_info[mb_type].type;
5407 goto decode_intra_mb;
5410 if(h->slice_type == FF_SI_TYPE && mb_type)
5412 assert(h->slice_type_nos == FF_I_TYPE);
5414 partition_count = 0;
5415 cbp= i_mb_type_info[mb_type].cbp;
5416 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5417 mb_type= i_mb_type_info[mb_type].type;
5420 mb_type |= MB_TYPE_INTERLACED;
5422 h->slice_table[ mb_xy ]= h->slice_num;
5424 if(IS_INTRA_PCM(mb_type)) {
5427 // We assume these blocks are very rare so we do not optimize it.
5428 // FIXME The two following lines get the bitstream position in the cabac
5429 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5430 ptr= h->cabac.bytestream;
5431 if(h->cabac.low&0x1) ptr--;
5433 if(h->cabac.low&0x1FF) ptr--;
5436 // The pixels are stored in the same order as levels in h->mb array.
5437 memcpy(h->mb, ptr, 256); ptr+=256;
5439 memcpy(h->mb+128, ptr, 128); ptr+=128;
5442 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5444 // All blocks are present
5445 h->cbp_table[mb_xy] = 0x1ef;
5446 h->chroma_pred_mode_table[mb_xy] = 0;
5447 // In deblocking, the quantizer is 0
5448 s->current_picture.qscale_table[mb_xy]= 0;
5449 // All coeffs are present
5450 memset(h->non_zero_count[mb_xy], 16, 16);
5451 s->current_picture.mb_type[mb_xy]= mb_type;
5452 h->last_qscale_diff = 0;
5457 h->ref_count[0] <<= 1;
5458 h->ref_count[1] <<= 1;
5461 fill_caches(h, mb_type, 0);
5463 if( IS_INTRA( mb_type ) ) {
5465 if( IS_INTRA4x4( mb_type ) ) {
5466 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5467 mb_type |= MB_TYPE_8x8DCT;
5468 for( i = 0; i < 16; i+=4 ) {
5469 int pred = pred_intra_mode( h, i );
5470 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5471 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5474 for( i = 0; i < 16; i++ ) {
5475 int pred = pred_intra_mode( h, i );
5476 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5478 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5481 write_back_intra_pred_mode(h);
5482 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5484 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5485 if( h->intra16x16_pred_mode < 0 ) return -1;
5488 h->chroma_pred_mode_table[mb_xy] =
5489 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5491 pred_mode= check_intra_pred_mode( h, pred_mode );
5492 if( pred_mode < 0 ) return -1;
5493 h->chroma_pred_mode= pred_mode;
5495 } else if( partition_count == 4 ) {
5496 int i, j, sub_partition_count[4], list, ref[2][4];
5498 if( h->slice_type_nos == FF_B_TYPE ) {
5499 for( i = 0; i < 4; i++ ) {
5500 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5501 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5502 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5504 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5505 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5506 pred_direct_motion(h, &mb_type);
5507 h->ref_cache[0][scan8[4]] =
5508 h->ref_cache[1][scan8[4]] =
5509 h->ref_cache[0][scan8[12]] =
5510 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5511 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5512 for( i = 0; i < 4; i++ )
5513 if( IS_DIRECT(h->sub_mb_type[i]) )
5514 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5518 for( i = 0; i < 4; i++ ) {
5519 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5520 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5521 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5525 for( list = 0; list < h->list_count; list++ ) {
5526 for( i = 0; i < 4; i++ ) {
5527 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5528 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5529 if( h->ref_count[list] > 1 ){
5530 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5531 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5532 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5540 h->ref_cache[list][ scan8[4*i]+1 ]=
5541 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5546 dct8x8_allowed = get_dct8x8_allowed(h);
5548 for(list=0; list<h->list_count; list++){
5550 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5551 if(IS_DIRECT(h->sub_mb_type[i])){
5552 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5556 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5557 const int sub_mb_type= h->sub_mb_type[i];
5558 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5559 for(j=0; j<sub_partition_count[i]; j++){
5562 const int index= 4*i + block_width*j;
5563 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5564 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5565 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5567 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5568 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5569 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5571 if(IS_SUB_8X8(sub_mb_type)){
5573 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5575 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5578 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5580 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5581 }else if(IS_SUB_8X4(sub_mb_type)){
5582 mv_cache[ 1 ][0]= mx;
5583 mv_cache[ 1 ][1]= my;
5585 mvd_cache[ 1 ][0]= mx - mpx;
5586 mvd_cache[ 1 ][1]= my - mpy;
5587 }else if(IS_SUB_4X8(sub_mb_type)){
5588 mv_cache[ 8 ][0]= mx;
5589 mv_cache[ 8 ][1]= my;
5591 mvd_cache[ 8 ][0]= mx - mpx;
5592 mvd_cache[ 8 ][1]= my - mpy;
5594 mv_cache[ 0 ][0]= mx;
5595 mv_cache[ 0 ][1]= my;
5597 mvd_cache[ 0 ][0]= mx - mpx;
5598 mvd_cache[ 0 ][1]= my - mpy;
5601 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5602 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5603 p[0] = p[1] = p[8] = p[9] = 0;
5604 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5608 } else if( IS_DIRECT(mb_type) ) {
5609 pred_direct_motion(h, &mb_type);
5610 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5611 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5612 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5614 int list, mx, my, i, mpx, mpy;
5615 if(IS_16X16(mb_type)){
5616 for(list=0; list<h->list_count; list++){
5617 if(IS_DIR(mb_type, 0, list)){
5619 if(h->ref_count[list] > 1){
5620 ref= decode_cabac_mb_ref(h, list, 0);
5621 if(ref >= (unsigned)h->ref_count[list]){
5622 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5627 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5629 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5631 for(list=0; list<h->list_count; list++){
5632 if(IS_DIR(mb_type, 0, list)){
5633 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5635 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5636 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5637 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5639 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5640 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5642 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5645 else if(IS_16X8(mb_type)){
5646 for(list=0; list<h->list_count; list++){
5648 if(IS_DIR(mb_type, i, list)){
5650 if(h->ref_count[list] > 1){
5651 ref= decode_cabac_mb_ref( h, list, 8*i );
5652 if(ref >= (unsigned)h->ref_count[list]){
5653 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5658 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5660 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5663 for(list=0; list<h->list_count; list++){
5665 if(IS_DIR(mb_type, i, list)){
5666 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5667 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5668 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5669 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5671 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5672 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5674 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5675 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5680 assert(IS_8X16(mb_type));
5681 for(list=0; list<h->list_count; list++){
5683 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5685 if(h->ref_count[list] > 1){
5686 ref= decode_cabac_mb_ref( h, list, 4*i );
5687 if(ref >= (unsigned)h->ref_count[list]){
5688 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5693 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5695 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5698 for(list=0; list<h->list_count; list++){
5700 if(IS_DIR(mb_type, i, list)){
5701 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5702 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5703 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5705 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5706 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5707 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5709 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5710 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5717 if( IS_INTER( mb_type ) ) {
5718 h->chroma_pred_mode_table[mb_xy] = 0;
5719 write_back_motion( h, mb_type );
5722 if( !IS_INTRA16x16( mb_type ) ) {
5723 cbp = decode_cabac_mb_cbp_luma( h );
5725 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5728 h->cbp_table[mb_xy] = h->cbp = cbp;
5730 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5731 if( decode_cabac_mb_transform_size( h ) )
5732 mb_type |= MB_TYPE_8x8DCT;
5734 s->current_picture.mb_type[mb_xy]= mb_type;
5736 if( cbp || IS_INTRA16x16( mb_type ) ) {
5737 const uint8_t *scan, *scan8x8, *dc_scan;
5738 const uint32_t *qmul;
5741 if(IS_INTERLACED(mb_type)){
5742 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5743 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5744 dc_scan= luma_dc_field_scan;
5746 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5747 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5748 dc_scan= luma_dc_zigzag_scan;
5751 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5752 if( dqp == INT_MIN ){
5753 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5757 if(((unsigned)s->qscale) > 51){
5758 if(s->qscale<0) s->qscale+= 52;
5759 else s->qscale-= 52;
5761 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5762 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5764 if( IS_INTRA16x16( mb_type ) ) {
5766 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5767 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5770 qmul = h->dequant4_coeff[0][s->qscale];
5771 for( i = 0; i < 16; i++ ) {
5772 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5773 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5776 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5780 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5781 if( cbp & (1<<i8x8) ) {
5782 if( IS_8x8DCT(mb_type) ) {
5783 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5784 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5786 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5787 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5788 const int index = 4*i8x8 + i4x4;
5789 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5791 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5792 //STOP_TIMER("decode_residual")
5796 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5797 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5804 for( c = 0; c < 2; c++ ) {
5805 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5806 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5812 for( c = 0; c < 2; c++ ) {
5813 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5814 for( i = 0; i < 4; i++ ) {
5815 const int index = 16 + 4 * c + i;
5816 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5817 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5821 uint8_t * const nnz= &h->non_zero_count_cache[0];
5822 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5823 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5826 uint8_t * const nnz= &h->non_zero_count_cache[0];
5827 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5828 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5829 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5830 h->last_qscale_diff = 0;
5833 s->current_picture.qscale_table[mb_xy]= s->qscale;
5834 write_back_non_zero_count(h);
5837 h->ref_count[0] >>= 1;
5838 h->ref_count[1] >>= 1;
5845 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5847 const int index_a = qp + h->slice_alpha_c0_offset;
5848 const int alpha = (alpha_table+52)[index_a];
5849 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5854 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5855 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5857 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5860 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5862 const int index_a = qp + h->slice_alpha_c0_offset;
5863 const int alpha = (alpha_table+52)[index_a];
5864 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5869 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5870 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5872 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5876 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5878 for( i = 0; i < 16; i++, pix += stride) {
5884 int bS_index = (i >> 1);
5887 bS_index |= (i & 1);
5890 if( bS[bS_index] == 0 ) {
5894 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5895 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5896 alpha = (alpha_table+52)[index_a];
5897 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5899 if( bS[bS_index] < 4 ) {
5900 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5901 const int p0 = pix[-1];
5902 const int p1 = pix[-2];
5903 const int p2 = pix[-3];
5904 const int q0 = pix[0];
5905 const int q1 = pix[1];
5906 const int q2 = pix[2];
5908 if( FFABS( p0 - q0 ) < alpha &&
5909 FFABS( p1 - p0 ) < beta &&
5910 FFABS( q1 - q0 ) < beta ) {
5914 if( FFABS( p2 - p0 ) < beta ) {
5915 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5918 if( FFABS( q2 - q0 ) < beta ) {
5919 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5923 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5924 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5925 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5926 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5929 const int p0 = pix[-1];
5930 const int p1 = pix[-2];
5931 const int p2 = pix[-3];
5933 const int q0 = pix[0];
5934 const int q1 = pix[1];
5935 const int q2 = pix[2];
5937 if( FFABS( p0 - q0 ) < alpha &&
5938 FFABS( p1 - p0 ) < beta &&
5939 FFABS( q1 - q0 ) < beta ) {
5941 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5942 if( FFABS( p2 - p0 ) < beta)
5944 const int p3 = pix[-4];
5946 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5947 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5948 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5951 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5953 if( FFABS( q2 - q0 ) < beta)
5955 const int q3 = pix[3];
5957 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5958 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5959 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5962 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5966 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5967 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5969 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5974 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5976 for( i = 0; i < 8; i++, pix += stride) {
5984 if( bS[bS_index] == 0 ) {
5988 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5989 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5990 alpha = (alpha_table+52)[index_a];
5991 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5993 if( bS[bS_index] < 4 ) {
5994 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
5995 const int p0 = pix[-1];
5996 const int p1 = pix[-2];
5997 const int q0 = pix[0];
5998 const int q1 = pix[1];
6000 if( FFABS( p0 - q0 ) < alpha &&
6001 FFABS( p1 - p0 ) < beta &&
6002 FFABS( q1 - q0 ) < beta ) {
6003 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6005 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6006 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6007 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6010 const int p0 = pix[-1];
6011 const int p1 = pix[-2];
6012 const int q0 = pix[0];
6013 const int q1 = pix[1];
6015 if( FFABS( p0 - q0 ) < alpha &&
6016 FFABS( p1 - p0 ) < beta &&
6017 FFABS( q1 - q0 ) < beta ) {
6019 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6020 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6021 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6027 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6029 const int index_a = qp + h->slice_alpha_c0_offset;
6030 const int alpha = (alpha_table+52)[index_a];
6031 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6032 const int pix_next = stride;
6037 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6038 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6040 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6044 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6046 const int index_a = qp + h->slice_alpha_c0_offset;
6047 const int alpha = (alpha_table+52)[index_a];
6048 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6053 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6054 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6056 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6060 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6061 MpegEncContext * const s = &h->s;
6062 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6064 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6068 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6069 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6070 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6071 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6072 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6075 assert(!FRAME_MBAFF);
6077 mb_type = s->current_picture.mb_type[mb_xy];
6078 qp = s->current_picture.qscale_table[mb_xy];
6079 qp0 = s->current_picture.qscale_table[mb_xy-1];
6080 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6081 qpc = get_chroma_qp( h, 0, qp );
6082 qpc0 = get_chroma_qp( h, 0, qp0 );
6083 qpc1 = get_chroma_qp( h, 0, qp1 );
6084 qp0 = (qp + qp0 + 1) >> 1;
6085 qp1 = (qp + qp1 + 1) >> 1;
6086 qpc0 = (qpc + qpc0 + 1) >> 1;
6087 qpc1 = (qpc + qpc1 + 1) >> 1;
6088 qp_thresh = 15 - h->slice_alpha_c0_offset;
6089 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6090 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6093 if( IS_INTRA(mb_type) ) {
6094 int16_t bS4[4] = {4,4,4,4};
6095 int16_t bS3[4] = {3,3,3,3};
6096 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6097 if( IS_8x8DCT(mb_type) ) {
6098 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6099 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6100 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6101 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6103 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6104 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6105 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6106 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6107 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6108 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6109 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6110 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6112 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6113 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6114 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6115 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6116 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6117 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6118 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6119 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6122 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6123 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6125 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6127 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6129 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6130 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6131 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6132 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6134 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6135 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6136 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6137 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6139 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6140 bSv[0][0] = 0x0004000400040004ULL;
6141 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6142 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6144 #define FILTER(hv,dir,edge)\
6145 if(bSv[dir][edge]) {\
6146 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6148 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6149 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6155 } else if( IS_8x8DCT(mb_type) ) {
6175 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6176 MpegEncContext * const s = &h->s;
6178 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6179 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6180 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6181 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6182 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6184 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6185 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6186 // how often to recheck mv-based bS when iterating between edges
6187 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6188 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6189 // how often to recheck mv-based bS when iterating along each edge
6190 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6192 if (first_vertical_edge_done) {
6196 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6199 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6200 && !IS_INTERLACED(mb_type)
6201 && IS_INTERLACED(mbm_type)
6203 // This is a special case in the norm where the filtering must
6204 // be done twice (one each of the field) even if we are in a
6205 // frame macroblock.
6207 static const int nnz_idx[4] = {4,5,6,3};
6208 unsigned int tmp_linesize = 2 * linesize;
6209 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6210 int mbn_xy = mb_xy - 2 * s->mb_stride;
6215 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6216 if( IS_INTRA(mb_type) ||
6217 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6218 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6220 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6221 for( i = 0; i < 4; i++ ) {
6222 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6223 mbn_nnz[nnz_idx[i]] != 0 )
6229 // Do not use s->qscale as luma quantizer because it has not the same
6230 // value in IPCM macroblocks.
6231 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6232 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6233 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6234 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6235 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6236 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6237 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6238 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6245 for( edge = start; edge < edges; edge++ ) {
6246 /* mbn_xy: neighbor macroblock */
6247 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6248 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6249 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6253 if( (edge&1) && IS_8x8DCT(mb_type) )
6256 if( IS_INTRA(mb_type) ||
6257 IS_INTRA(mbn_type) ) {
6260 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6261 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6270 bS[0] = bS[1] = bS[2] = bS[3] = value;
6275 if( edge & mask_edge ) {
6276 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6279 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6280 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6283 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6284 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6285 int bn_idx= b_idx - (dir ? 8:1);
6288 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6289 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6290 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6291 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6294 if(h->slice_type_nos == FF_B_TYPE && v){
6296 for( l = 0; !v && l < 2; l++ ) {
6298 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6299 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6300 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6304 bS[0] = bS[1] = bS[2] = bS[3] = v;
6310 for( i = 0; i < 4; i++ ) {
6311 int x = dir == 0 ? edge : i;
6312 int y = dir == 0 ? i : edge;
6313 int b_idx= 8 + 4 + x + 8*y;
6314 int bn_idx= b_idx - (dir ? 8:1);
6316 if( h->non_zero_count_cache[b_idx] |
6317 h->non_zero_count_cache[bn_idx] ) {
6323 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6324 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6325 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6326 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6332 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6334 for( l = 0; l < 2; l++ ) {
6336 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6337 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6338 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6347 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6352 // Do not use s->qscale as luma quantizer because it has not the same
6353 // value in IPCM macroblocks.
6354 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6355 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6356 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6357 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6359 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6360 if( (edge&1) == 0 ) {
6361 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6362 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6363 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6364 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6367 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6368 if( (edge&1) == 0 ) {
6369 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6370 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6371 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6372 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6378 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6379 MpegEncContext * const s = &h->s;
6380 const int mb_xy= mb_x + mb_y*s->mb_stride;
6381 const int mb_type = s->current_picture.mb_type[mb_xy];
6382 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6383 int first_vertical_edge_done = 0;
6386 //for sufficiently low qp, filtering wouldn't do anything
6387 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6389 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6390 int qp = s->current_picture.qscale_table[mb_xy];
6392 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6393 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6398 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6399 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6400 int top_type, left_type[2];
6401 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6402 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6403 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6405 if(IS_8x8DCT(top_type)){
6406 h->non_zero_count_cache[4+8*0]=
6407 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6408 h->non_zero_count_cache[6+8*0]=
6409 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6411 if(IS_8x8DCT(left_type[0])){
6412 h->non_zero_count_cache[3+8*1]=
6413 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6415 if(IS_8x8DCT(left_type[1])){
6416 h->non_zero_count_cache[3+8*3]=
6417 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6420 if(IS_8x8DCT(mb_type)){
6421 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6422 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6424 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6425 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6427 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6428 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6430 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6431 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6436 // left mb is in picture
6437 && h->slice_table[mb_xy-1] != 0xFFFF
6438 // and current and left pair do not have the same interlaced type
6439 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6440 // and left mb is in the same slice if deblocking_filter == 2
6441 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6442 /* First vertical edge is different in MBAFF frames
6443 * There are 8 different bS to compute and 2 different Qp
6445 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6446 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6451 int mb_qp, mbn0_qp, mbn1_qp;
6453 first_vertical_edge_done = 1;
6455 if( IS_INTRA(mb_type) )
6456 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6458 for( i = 0; i < 8; i++ ) {
6459 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6461 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6463 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6464 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6465 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6467 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6474 mb_qp = s->current_picture.qscale_table[mb_xy];
6475 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6476 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6477 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6478 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6479 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6480 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6481 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6482 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6483 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6484 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6485 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6486 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6489 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6490 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6491 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6492 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6493 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6497 for( dir = 0; dir < 2; dir++ )
6498 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6500 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6501 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6505 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6506 H264Context *h = *(void**)arg;
6507 MpegEncContext * const s = &h->s;
6508 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6512 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6513 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6515 if( h->pps.cabac ) {
6519 align_get_bits( &s->gb );
6522 ff_init_cabac_states( &h->cabac);
6523 ff_init_cabac_decoder( &h->cabac,
6524 s->gb.buffer + get_bits_count(&s->gb)/8,
6525 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6526 /* calculate pre-state */
6527 for( i= 0; i < 460; i++ ) {
6529 if( h->slice_type_nos == FF_I_TYPE )
6530 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6532 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6535 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6537 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6542 int ret = decode_mb_cabac(h);
6544 //STOP_TIMER("decode_mb_cabac")
6546 if(ret>=0) hl_decode_mb(h);
6548 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6551 if(ret>=0) ret = decode_mb_cabac(h);
6553 if(ret>=0) hl_decode_mb(h);
6556 eos = get_cabac_terminate( &h->cabac );
6558 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6559 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6560 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6564 if( ++s->mb_x >= s->mb_width ) {
6566 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6568 if(FIELD_OR_MBAFF_PICTURE) {
6573 if( eos || s->mb_y >= s->mb_height ) {
6574 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6575 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6582 int ret = decode_mb_cavlc(h);
6584 if(ret>=0) hl_decode_mb(h);
6586 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6588 ret = decode_mb_cavlc(h);
6590 if(ret>=0) hl_decode_mb(h);
6595 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6596 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6601 if(++s->mb_x >= s->mb_width){
6603 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6605 if(FIELD_OR_MBAFF_PICTURE) {
6608 if(s->mb_y >= s->mb_height){
6609 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6611 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6612 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6616 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6623 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6624 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6625 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6626 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6630 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6639 for(;s->mb_y < s->mb_height; s->mb_y++){
6640 for(;s->mb_x < s->mb_width; s->mb_x++){
6641 int ret= decode_mb(h);
6646 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6647 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6652 if(++s->mb_x >= s->mb_width){
6654 if(++s->mb_y >= s->mb_height){
6655 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6656 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6660 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6667 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6668 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6669 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6673 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6680 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6683 return -1; //not reached
6686 static int decode_picture_timing(H264Context *h){
6687 MpegEncContext * const s = &h->s;
6688 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6689 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6690 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6692 if(h->sps.pic_struct_present_flag){
6693 unsigned int i, num_clock_ts;
6694 h->sei_pic_struct = get_bits(&s->gb, 4);
6696 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6699 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6701 for (i = 0 ; i < num_clock_ts ; i++){
6702 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6703 unsigned int full_timestamp_flag;
6704 skip_bits(&s->gb, 2); /* ct_type */
6705 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6706 skip_bits(&s->gb, 5); /* counting_type */
6707 full_timestamp_flag = get_bits(&s->gb, 1);
6708 skip_bits(&s->gb, 1); /* discontinuity_flag */
6709 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6710 skip_bits(&s->gb, 8); /* n_frames */
6711 if(full_timestamp_flag){
6712 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6713 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6714 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6716 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6717 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6718 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6719 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6720 if(get_bits(&s->gb, 1)) /* hours_flag */
6721 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6725 if(h->sps.time_offset_length > 0)
6726 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6733 static int decode_unregistered_user_data(H264Context *h, int size){
6734 MpegEncContext * const s = &h->s;
6735 uint8_t user_data[16+256];
6741 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6742 user_data[i]= get_bits(&s->gb, 8);
6746 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6747 if(e==1 && build>=0)
6748 h->x264_build= build;
6750 if(s->avctx->debug & FF_DEBUG_BUGS)
6751 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6754 skip_bits(&s->gb, 8);
6759 static int decode_sei(H264Context *h){
6760 MpegEncContext * const s = &h->s;
6762 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6767 type+= show_bits(&s->gb, 8);
6768 }while(get_bits(&s->gb, 8) == 255);
6772 size+= show_bits(&s->gb, 8);
6773 }while(get_bits(&s->gb, 8) == 255);
6776 case 1: // Picture timing SEI
6777 if(decode_picture_timing(h) < 0)
6781 if(decode_unregistered_user_data(h, size) < 0)
6785 skip_bits(&s->gb, 8*size);
6788 //FIXME check bits here
6789 align_get_bits(&s->gb);
6795 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6796 MpegEncContext * const s = &h->s;
6798 cpb_count = get_ue_golomb(&s->gb) + 1;
6800 if(cpb_count > 32U){
6801 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6805 get_bits(&s->gb, 4); /* bit_rate_scale */
6806 get_bits(&s->gb, 4); /* cpb_size_scale */
6807 for(i=0; i<cpb_count; i++){
6808 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6809 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6810 get_bits1(&s->gb); /* cbr_flag */
6812 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6813 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6814 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6815 sps->time_offset_length = get_bits(&s->gb, 5);
6819 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6820 MpegEncContext * const s = &h->s;
6821 int aspect_ratio_info_present_flag;
6822 unsigned int aspect_ratio_idc;
6824 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6826 if( aspect_ratio_info_present_flag ) {
6827 aspect_ratio_idc= get_bits(&s->gb, 8);
6828 if( aspect_ratio_idc == EXTENDED_SAR ) {
6829 sps->sar.num= get_bits(&s->gb, 16);
6830 sps->sar.den= get_bits(&s->gb, 16);
6831 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6832 sps->sar= pixel_aspect[aspect_ratio_idc];
6834 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6841 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6843 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6844 get_bits1(&s->gb); /* overscan_appropriate_flag */
6847 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6848 get_bits(&s->gb, 3); /* video_format */
6849 get_bits1(&s->gb); /* video_full_range_flag */
6850 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6851 get_bits(&s->gb, 8); /* colour_primaries */
6852 get_bits(&s->gb, 8); /* transfer_characteristics */
6853 get_bits(&s->gb, 8); /* matrix_coefficients */
6857 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6858 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6859 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6862 sps->timing_info_present_flag = get_bits1(&s->gb);
6863 if(sps->timing_info_present_flag){
6864 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6865 sps->time_scale = get_bits_long(&s->gb, 32);
6866 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6869 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6870 if(sps->nal_hrd_parameters_present_flag)
6871 if(decode_hrd_parameters(h, sps) < 0)
6873 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6874 if(sps->vcl_hrd_parameters_present_flag)
6875 if(decode_hrd_parameters(h, sps) < 0)
6877 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6878 get_bits1(&s->gb); /* low_delay_hrd_flag */
6879 sps->pic_struct_present_flag = get_bits1(&s->gb);
6881 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6882 if(sps->bitstream_restriction_flag){
6883 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6884 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6885 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6886 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6887 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6888 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6889 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6891 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6892 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6900 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6901 const uint8_t *jvt_list, const uint8_t *fallback_list){
6902 MpegEncContext * const s = &h->s;
6903 int i, last = 8, next = 8;
6904 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6905 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6906 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6908 for(i=0;i<size;i++){
6910 next = (last + get_se_golomb(&s->gb)) & 0xff;
6911 if(!i && !next){ /* matrix not written, we use the preset one */
6912 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6915 last = factors[scan[i]] = next ? next : last;
6919 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6920 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6921 MpegEncContext * const s = &h->s;
6922 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6923 const uint8_t *fallback[4] = {
6924 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6925 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6926 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6927 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6929 if(get_bits1(&s->gb)){
6930 sps->scaling_matrix_present |= is_sps;
6931 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6932 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6933 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6934 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6935 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6936 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6937 if(is_sps || pps->transform_8x8_mode){
6938 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6939 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6944 static inline int decode_seq_parameter_set(H264Context *h){
6945 MpegEncContext * const s = &h->s;
6946 int profile_idc, level_idc;
6947 unsigned int sps_id;
6951 profile_idc= get_bits(&s->gb, 8);
6952 get_bits1(&s->gb); //constraint_set0_flag
6953 get_bits1(&s->gb); //constraint_set1_flag
6954 get_bits1(&s->gb); //constraint_set2_flag
6955 get_bits1(&s->gb); //constraint_set3_flag
6956 get_bits(&s->gb, 4); // reserved
6957 level_idc= get_bits(&s->gb, 8);
6958 sps_id= get_ue_golomb(&s->gb);
6960 if(sps_id >= MAX_SPS_COUNT) {
6961 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
6964 sps= av_mallocz(sizeof(SPS));
6968 sps->profile_idc= profile_idc;
6969 sps->level_idc= level_idc;
6971 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
6972 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
6973 sps->scaling_matrix_present = 0;
6975 if(sps->profile_idc >= 100){ //high profile
6976 sps->chroma_format_idc= get_ue_golomb(&s->gb);
6977 if(sps->chroma_format_idc == 3)
6978 get_bits1(&s->gb); //residual_color_transform_flag
6979 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6980 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6981 sps->transform_bypass = get_bits1(&s->gb);
6982 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6984 sps->chroma_format_idc= 1;
6987 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6988 sps->poc_type= get_ue_golomb(&s->gb);
6990 if(sps->poc_type == 0){ //FIXME #define
6991 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6992 } else if(sps->poc_type == 1){//FIXME #define
6993 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6994 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6995 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6996 sps->poc_cycle_length = get_ue_golomb(&s->gb);
6998 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
6999 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7003 for(i=0; i<sps->poc_cycle_length; i++)
7004 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7005 }else if(sps->poc_type != 2){
7006 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7010 sps->ref_frame_count= get_ue_golomb(&s->gb);
7011 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7012 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7015 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7016 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7017 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7018 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7019 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7020 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7024 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7025 if(!sps->frame_mbs_only_flag)
7026 sps->mb_aff= get_bits1(&s->gb);
7030 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7032 #ifndef ALLOW_INTERLACE
7034 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7036 sps->crop= get_bits1(&s->gb);
7038 sps->crop_left = get_ue_golomb(&s->gb);
7039 sps->crop_right = get_ue_golomb(&s->gb);
7040 sps->crop_top = get_ue_golomb(&s->gb);
7041 sps->crop_bottom= get_ue_golomb(&s->gb);
7042 if(sps->crop_left || sps->crop_top){
7043 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7045 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7046 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7052 sps->crop_bottom= 0;
7055 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7056 if( sps->vui_parameters_present_flag )
7057 decode_vui_parameters(h, sps);
7059 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7060 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7061 sps_id, sps->profile_idc, sps->level_idc,
7063 sps->ref_frame_count,
7064 sps->mb_width, sps->mb_height,
7065 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7066 sps->direct_8x8_inference_flag ? "8B8" : "",
7067 sps->crop_left, sps->crop_right,
7068 sps->crop_top, sps->crop_bottom,
7069 sps->vui_parameters_present_flag ? "VUI" : "",
7070 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7073 av_free(h->sps_buffers[sps_id]);
7074 h->sps_buffers[sps_id]= sps;
7082 build_qp_table(PPS *pps, int t, int index)
7085 for(i = 0; i < 52; i++)
7086 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7089 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7090 MpegEncContext * const s = &h->s;
7091 unsigned int pps_id= get_ue_golomb(&s->gb);
7094 if(pps_id >= MAX_PPS_COUNT) {
7095 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7099 pps= av_mallocz(sizeof(PPS));
7102 pps->sps_id= get_ue_golomb(&s->gb);
7103 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7104 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7108 pps->cabac= get_bits1(&s->gb);
7109 pps->pic_order_present= get_bits1(&s->gb);
7110 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7111 if(pps->slice_group_count > 1 ){
7112 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7113 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7114 switch(pps->mb_slice_group_map_type){
7117 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7118 | run_length[ i ] |1 |ue(v) |
7123 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7125 | top_left_mb[ i ] |1 |ue(v) |
7126 | bottom_right_mb[ i ] |1 |ue(v) |
7134 | slice_group_change_direction_flag |1 |u(1) |
7135 | slice_group_change_rate_minus1 |1 |ue(v) |
7140 | slice_group_id_cnt_minus1 |1 |ue(v) |
7141 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7143 | slice_group_id[ i ] |1 |u(v) |
7148 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7149 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7150 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7151 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7155 pps->weighted_pred= get_bits1(&s->gb);
7156 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7157 pps->init_qp= get_se_golomb(&s->gb) + 26;
7158 pps->init_qs= get_se_golomb(&s->gb) + 26;
7159 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7160 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7161 pps->constrained_intra_pred= get_bits1(&s->gb);
7162 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7164 pps->transform_8x8_mode= 0;
7165 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7166 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7167 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7169 if(get_bits_count(&s->gb) < bit_length){
7170 pps->transform_8x8_mode= get_bits1(&s->gb);
7171 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7172 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7174 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7177 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7178 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7179 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7180 h->pps.chroma_qp_diff= 1;
7182 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7183 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7184 pps_id, pps->sps_id,
7185 pps->cabac ? "CABAC" : "CAVLC",
7186 pps->slice_group_count,
7187 pps->ref_count[0], pps->ref_count[1],
7188 pps->weighted_pred ? "weighted" : "",
7189 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7190 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7191 pps->constrained_intra_pred ? "CONSTR" : "",
7192 pps->redundant_pic_cnt_present ? "REDU" : "",
7193 pps->transform_8x8_mode ? "8x8DCT" : ""
7197 av_free(h->pps_buffers[pps_id]);
7198 h->pps_buffers[pps_id]= pps;
7206 * Call decode_slice() for each context.
7208 * @param h h264 master context
7209 * @param context_count number of contexts to execute
7211 static void execute_decode_slices(H264Context *h, int context_count){
7212 MpegEncContext * const s = &h->s;
7213 AVCodecContext * const avctx= s->avctx;
7217 if(context_count == 1) {
7218 decode_slice(avctx, &h);
7220 for(i = 1; i < context_count; i++) {
7221 hx = h->thread_context[i];
7222 hx->s.error_recognition = avctx->error_recognition;
7223 hx->s.error_count = 0;
7226 avctx->execute(avctx, (void *)decode_slice,
7227 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7229 /* pull back stuff from slices to master context */
7230 hx = h->thread_context[context_count - 1];
7231 s->mb_x = hx->s.mb_x;
7232 s->mb_y = hx->s.mb_y;
7233 s->dropable = hx->s.dropable;
7234 s->picture_structure = hx->s.picture_structure;
7235 for(i = 1; i < context_count; i++)
7236 h->s.error_count += h->thread_context[i]->s.error_count;
7241 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7242 MpegEncContext * const s = &h->s;
7243 AVCodecContext * const avctx= s->avctx;
7245 H264Context *hx; ///< thread context
7246 int context_count = 0;
7248 h->max_contexts = avctx->thread_count;
7251 for(i=0; i<50; i++){
7252 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7255 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7256 h->current_slice = 0;
7257 if (!s->first_field)
7258 s->current_picture_ptr= NULL;
7270 if(buf_index >= buf_size) break;
7272 for(i = 0; i < h->nal_length_size; i++)
7273 nalsize = (nalsize << 8) | buf[buf_index++];
7274 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7279 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7284 // start code prefix search
7285 for(; buf_index + 3 < buf_size; buf_index++){
7286 // This should always succeed in the first iteration.
7287 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7291 if(buf_index+3 >= buf_size) break;
7296 hx = h->thread_context[context_count];
7298 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7299 if (ptr==NULL || dst_length < 0){
7302 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7304 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7306 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7307 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7310 if (h->is_avc && (nalsize != consumed)){
7311 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7315 buf_index += consumed;
7317 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7318 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7323 switch(hx->nal_unit_type){
7325 if (h->nal_unit_type != NAL_IDR_SLICE) {
7326 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7329 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7331 init_get_bits(&hx->s.gb, ptr, bit_length);
7333 hx->inter_gb_ptr= &hx->s.gb;
7334 hx->s.data_partitioning = 0;
7336 if((err = decode_slice_header(hx, h)))
7339 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7340 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7341 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7342 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7343 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7344 && avctx->skip_frame < AVDISCARD_ALL)
7348 init_get_bits(&hx->s.gb, ptr, bit_length);
7350 hx->inter_gb_ptr= NULL;
7351 hx->s.data_partitioning = 1;
7353 err = decode_slice_header(hx, h);
7356 init_get_bits(&hx->intra_gb, ptr, bit_length);
7357 hx->intra_gb_ptr= &hx->intra_gb;
7360 init_get_bits(&hx->inter_gb, ptr, bit_length);
7361 hx->inter_gb_ptr= &hx->inter_gb;
7363 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7364 && s->context_initialized
7366 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7367 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7368 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7369 && avctx->skip_frame < AVDISCARD_ALL)
7373 init_get_bits(&s->gb, ptr, bit_length);
7377 init_get_bits(&s->gb, ptr, bit_length);
7378 decode_seq_parameter_set(h);
7380 if(s->flags& CODEC_FLAG_LOW_DELAY)
7383 if(avctx->has_b_frames < 2)
7384 avctx->has_b_frames= !s->low_delay;
7387 init_get_bits(&s->gb, ptr, bit_length);
7389 decode_picture_parameter_set(h, bit_length);
7393 case NAL_END_SEQUENCE:
7394 case NAL_END_STREAM:
7395 case NAL_FILLER_DATA:
7397 case NAL_AUXILIARY_SLICE:
7400 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7403 if(context_count == h->max_contexts) {
7404 execute_decode_slices(h, context_count);
7409 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7411 /* Slice could not be decoded in parallel mode, copy down
7412 * NAL unit stuff to context 0 and restart. Note that
7413 * rbsp_buffer is not transferred, but since we no longer
7414 * run in parallel mode this should not be an issue. */
7415 h->nal_unit_type = hx->nal_unit_type;
7416 h->nal_ref_idc = hx->nal_ref_idc;
7422 execute_decode_slices(h, context_count);
7427 * returns the number of bytes consumed for building the current frame
7429 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7430 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7431 if(pos+10>buf_size) pos=buf_size; // oops ;)
7436 static int decode_frame(AVCodecContext *avctx,
7437 void *data, int *data_size,
7438 const uint8_t *buf, int buf_size)
7440 H264Context *h = avctx->priv_data;
7441 MpegEncContext *s = &h->s;
7442 AVFrame *pict = data;
7445 s->flags= avctx->flags;
7446 s->flags2= avctx->flags2;
7448 /* end of stream, output what is still in the buffers */
7449 if (buf_size == 0) {
7453 //FIXME factorize this with the output code below
7454 out = h->delayed_pic[0];
7456 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7457 if(h->delayed_pic[i]->poc < out->poc){
7458 out = h->delayed_pic[i];
7462 for(i=out_idx; h->delayed_pic[i]; i++)
7463 h->delayed_pic[i] = h->delayed_pic[i+1];
7466 *data_size = sizeof(AVFrame);
7467 *pict= *(AVFrame*)out;
7473 if(h->is_avc && !h->got_avcC) {
7474 int i, cnt, nalsize;
7475 unsigned char *p = avctx->extradata;
7476 if(avctx->extradata_size < 7) {
7477 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7481 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7484 /* sps and pps in the avcC always have length coded with 2 bytes,
7485 so put a fake nal_length_size = 2 while parsing them */
7486 h->nal_length_size = 2;
7487 // Decode sps from avcC
7488 cnt = *(p+5) & 0x1f; // Number of sps
7490 for (i = 0; i < cnt; i++) {
7491 nalsize = AV_RB16(p) + 2;
7492 if(decode_nal_units(h, p, nalsize) < 0) {
7493 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7498 // Decode pps from avcC
7499 cnt = *(p++); // Number of pps
7500 for (i = 0; i < cnt; i++) {
7501 nalsize = AV_RB16(p) + 2;
7502 if(decode_nal_units(h, p, nalsize) != nalsize) {
7503 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7508 // Now store right nal length size, that will be use to parse all other nals
7509 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7510 // Do not reparse avcC
7514 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7515 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7520 buf_index=decode_nal_units(h, buf, buf_size);
7524 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7525 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7526 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7530 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7531 Picture *out = s->current_picture_ptr;
7532 Picture *cur = s->current_picture_ptr;
7533 int i, pics, cross_idr, out_of_order, out_idx;
7537 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7538 s->current_picture_ptr->pict_type= s->pict_type;
7541 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7542 h->prev_poc_msb= h->poc_msb;
7543 h->prev_poc_lsb= h->poc_lsb;
7545 h->prev_frame_num_offset= h->frame_num_offset;
7546 h->prev_frame_num= h->frame_num;
7549 * FIXME: Error handling code does not seem to support interlaced
7550 * when slices span multiple rows
7551 * The ff_er_add_slice calls don't work right for bottom
7552 * fields; they cause massive erroneous error concealing
7553 * Error marking covers both fields (top and bottom).
7554 * This causes a mismatched s->error_count
7555 * and a bad error table. Further, the error count goes to
7556 * INT_MAX when called for bottom field, because mb_y is
7557 * past end by one (callers fault) and resync_mb_y != 0
7558 * causes problems for the first MB line, too.
7565 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7566 /* Wait for second field. */
7570 cur->repeat_pict = 0;
7572 /* Signal interlacing information externally. */
7573 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7574 if(h->sps.pic_struct_present_flag){
7575 switch (h->sei_pic_struct)
7577 case SEI_PIC_STRUCT_FRAME:
7578 cur->interlaced_frame = 0;
7580 case SEI_PIC_STRUCT_TOP_FIELD:
7581 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7582 case SEI_PIC_STRUCT_TOP_BOTTOM:
7583 case SEI_PIC_STRUCT_BOTTOM_TOP:
7584 cur->interlaced_frame = 1;
7586 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7587 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7588 // Signal the possibility of telecined film externally (pic_struct 5,6)
7589 // From these hints, let the applications decide if they apply deinterlacing.
7590 cur->repeat_pict = 1;
7591 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7593 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7594 // Force progressive here, as doubling interlaced frame is a bad idea.
7595 cur->interlaced_frame = 0;
7596 cur->repeat_pict = 2;
7598 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7599 cur->interlaced_frame = 0;
7600 cur->repeat_pict = 4;
7604 /* Derive interlacing flag from used decoding process. */
7605 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7608 if (cur->field_poc[0] != cur->field_poc[1]){
7609 /* Derive top_field_first from field pocs. */
7610 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7612 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7613 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7614 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7615 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7616 cur->top_field_first = 1;
7618 cur->top_field_first = 0;
7620 /* Most likely progressive */
7621 cur->top_field_first = 0;
7625 //FIXME do something with unavailable reference frames
7627 /* Sort B-frames into display order */
7629 if(h->sps.bitstream_restriction_flag
7630 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7631 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7635 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7636 && !h->sps.bitstream_restriction_flag){
7637 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7642 while(h->delayed_pic[pics]) pics++;
7644 assert(pics <= MAX_DELAYED_PIC_COUNT);
7646 h->delayed_pic[pics++] = cur;
7647 if(cur->reference == 0)
7648 cur->reference = DELAYED_PIC_REF;
7650 out = h->delayed_pic[0];
7652 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7653 if(h->delayed_pic[i]->poc < out->poc){
7654 out = h->delayed_pic[i];
7657 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7659 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7661 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7663 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7665 ((!cross_idr && out->poc > h->outputed_poc + 2)
7666 || cur->pict_type == FF_B_TYPE)))
7669 s->avctx->has_b_frames++;
7672 if(out_of_order || pics > s->avctx->has_b_frames){
7673 out->reference &= ~DELAYED_PIC_REF;
7674 for(i=out_idx; h->delayed_pic[i]; i++)
7675 h->delayed_pic[i] = h->delayed_pic[i+1];
7677 if(!out_of_order && pics > s->avctx->has_b_frames){
7678 *data_size = sizeof(AVFrame);
7680 h->outputed_poc = out->poc;
7681 *pict= *(AVFrame*)out;
7683 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7688 assert(pict->data[0] || !*data_size);
7689 ff_print_debug_info(s, pict);
7690 //printf("out %d\n", (int)pict->data[0]);
7693 /* Return the Picture timestamp as the frame number */
7694 /* we subtract 1 because it is added on utils.c */
7695 avctx->frame_number = s->picture_number - 1;
7697 return get_consumed_bytes(s, buf_index, buf_size);
7700 static inline void fill_mb_avail(H264Context *h){
7701 MpegEncContext * const s = &h->s;
7702 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7705 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7706 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7707 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7713 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7714 h->mb_avail[4]= 1; //FIXME move out
7715 h->mb_avail[5]= 0; //FIXME move out
7723 #define SIZE (COUNT*40)
7729 // int int_temp[10000];
7731 AVCodecContext avctx;
7733 dsputil_init(&dsp, &avctx);
7735 init_put_bits(&pb, temp, SIZE);
7736 printf("testing unsigned exp golomb\n");
7737 for(i=0; i<COUNT; i++){
7739 set_ue_golomb(&pb, i);
7740 STOP_TIMER("set_ue_golomb");
7742 flush_put_bits(&pb);
7744 init_get_bits(&gb, temp, 8*SIZE);
7745 for(i=0; i<COUNT; i++){
7748 s= show_bits(&gb, 24);
7751 j= get_ue_golomb(&gb);
7753 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7756 STOP_TIMER("get_ue_golomb");
7760 init_put_bits(&pb, temp, SIZE);
7761 printf("testing signed exp golomb\n");
7762 for(i=0; i<COUNT; i++){
7764 set_se_golomb(&pb, i - COUNT/2);
7765 STOP_TIMER("set_se_golomb");
7767 flush_put_bits(&pb);
7769 init_get_bits(&gb, temp, 8*SIZE);
7770 for(i=0; i<COUNT; i++){
7773 s= show_bits(&gb, 24);
7776 j= get_se_golomb(&gb);
7777 if(j != i - COUNT/2){
7778 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7781 STOP_TIMER("get_se_golomb");
7785 printf("testing 4x4 (I)DCT\n");
7788 uint8_t src[16], ref[16];
7789 uint64_t error= 0, max_error=0;
7791 for(i=0; i<COUNT; i++){
7793 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7794 for(j=0; j<16; j++){
7795 ref[j]= random()%255;
7796 src[j]= random()%255;
7799 h264_diff_dct_c(block, src, ref, 4);
7802 for(j=0; j<16; j++){
7803 // printf("%d ", block[j]);
7804 block[j]= block[j]*4;
7805 if(j&1) block[j]= (block[j]*4 + 2)/5;
7806 if(j&4) block[j]= (block[j]*4 + 2)/5;
7810 s->dsp.h264_idct_add(ref, block, 4);
7811 /* for(j=0; j<16; j++){
7812 printf("%d ", ref[j]);
7816 for(j=0; j<16; j++){
7817 int diff= FFABS(src[j] - ref[j]);
7820 max_error= FFMAX(max_error, diff);
7823 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7824 printf("testing quantizer\n");
7825 for(qp=0; qp<52; qp++){
7827 src1_block[i]= src2_block[i]= random()%255;
7830 printf("Testing NAL layer\n");
7832 uint8_t bitstream[COUNT];
7833 uint8_t nal[COUNT*2];
7835 memset(&h, 0, sizeof(H264Context));
7837 for(i=0; i<COUNT; i++){
7845 for(j=0; j<COUNT; j++){
7846 bitstream[j]= (random() % 255) + 1;
7849 for(j=0; j<zeros; j++){
7850 int pos= random() % COUNT;
7851 while(bitstream[pos] == 0){
7860 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7862 printf("encoding failed\n");
7866 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7870 if(out_length != COUNT){
7871 printf("incorrect length %d %d\n", out_length, COUNT);
7875 if(consumed != nal_length){
7876 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7880 if(memcmp(bitstream, out, COUNT)){
7881 printf("mismatch\n");
7887 printf("Testing RBSP\n");
7895 static av_cold int decode_end(AVCodecContext *avctx)
7897 H264Context *h = avctx->priv_data;
7898 MpegEncContext *s = &h->s;
7901 av_freep(&h->rbsp_buffer[0]);
7902 av_freep(&h->rbsp_buffer[1]);
7903 free_tables(h); //FIXME cleanup init stuff perhaps
7905 for(i = 0; i < MAX_SPS_COUNT; i++)
7906 av_freep(h->sps_buffers + i);
7908 for(i = 0; i < MAX_PPS_COUNT; i++)
7909 av_freep(h->pps_buffers + i);
7913 // memset(h, 0, sizeof(H264Context));
7919 AVCodec h264_decoder = {
7923 sizeof(H264Context),
7928 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7930 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),