2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1591 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1592 int src_x_offset, int src_y_offset,
1593 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1594 MpegEncContext * const s = &h->s;
1595 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1596 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1597 const int luma_xy= (mx&3) + ((my&3)<<2);
1598 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1599 uint8_t * src_cb, * src_cr;
1600 int extra_width= h->emu_edge_width;
1601 int extra_height= h->emu_edge_height;
1603 const int full_mx= mx>>2;
1604 const int full_my= my>>2;
1605 const int pic_width = 16*s->mb_width;
1606 const int pic_height = 16*s->mb_height >> MB_FIELD;
1608 if(mx&7) extra_width -= 3;
1609 if(my&7) extra_height -= 3;
1611 if( full_mx < 0-extra_width
1612 || full_my < 0-extra_height
1613 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1614 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1615 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1616 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1620 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1622 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1625 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1628 // chroma offset when predicting from a field of opposite parity
1629 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1630 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1632 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1633 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1636 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1637 src_cb= s->edge_emu_buffer;
1639 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1642 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1643 src_cr= s->edge_emu_buffer;
1645 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1648 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1649 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1650 int x_offset, int y_offset,
1651 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1652 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1653 int list0, int list1){
1654 MpegEncContext * const s = &h->s;
1655 qpel_mc_func *qpix_op= qpix_put;
1656 h264_chroma_mc_func chroma_op= chroma_put;
1658 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1659 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1660 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1661 x_offset += 8*s->mb_x;
1662 y_offset += 8*(s->mb_y >> MB_FIELD);
1665 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1666 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1667 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1668 qpix_op, chroma_op);
1671 chroma_op= chroma_avg;
1675 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1676 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1677 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1678 qpix_op, chroma_op);
1682 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1683 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1684 int x_offset, int y_offset,
1685 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1686 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1687 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1688 int list0, int list1){
1689 MpegEncContext * const s = &h->s;
1691 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1692 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1693 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1694 x_offset += 8*s->mb_x;
1695 y_offset += 8*(s->mb_y >> MB_FIELD);
1698 /* don't optimize for luma-only case, since B-frames usually
1699 * use implicit weights => chroma too. */
1700 uint8_t *tmp_cb = s->obmc_scratchpad;
1701 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1702 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1703 int refn0 = h->ref_cache[0][ scan8[n] ];
1704 int refn1 = h->ref_cache[1][ scan8[n] ];
1706 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1707 dest_y, dest_cb, dest_cr,
1708 x_offset, y_offset, qpix_put, chroma_put);
1709 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1710 tmp_y, tmp_cb, tmp_cr,
1711 x_offset, y_offset, qpix_put, chroma_put);
1713 if(h->use_weight == 2){
1714 int weight0 = h->implicit_weight[refn0][refn1];
1715 int weight1 = 64 - weight0;
1716 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1717 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1718 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1720 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1721 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1722 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1723 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1724 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1725 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1726 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1727 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1728 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1731 int list = list1 ? 1 : 0;
1732 int refn = h->ref_cache[list][ scan8[n] ];
1733 Picture *ref= &h->ref_list[list][refn];
1734 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1735 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1736 qpix_put, chroma_put);
1738 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1739 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1740 if(h->use_weight_chroma){
1741 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1742 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1743 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1749 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1750 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1751 int x_offset, int y_offset,
1752 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1753 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1754 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1755 int list0, int list1){
1756 if((h->use_weight==2 && list0 && list1
1757 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1758 || h->use_weight==1)
1759 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1760 x_offset, y_offset, qpix_put, chroma_put,
1761 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1763 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1764 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1767 static inline void prefetch_motion(H264Context *h, int list){
1768 /* fetch pixels for estimated mv 4 macroblocks ahead
1769 * optimized for 64byte cache lines */
1770 MpegEncContext * const s = &h->s;
1771 const int refn = h->ref_cache[list][scan8[0]];
1773 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1774 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1775 uint8_t **src= h->ref_list[list][refn].data;
1776 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1777 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1778 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1779 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1783 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1784 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1785 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1786 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1787 MpegEncContext * const s = &h->s;
1788 const int mb_xy= h->mb_xy;
1789 const int mb_type= s->current_picture.mb_type[mb_xy];
1791 assert(IS_INTER(mb_type));
1793 prefetch_motion(h, 0);
1795 if(IS_16X16(mb_type)){
1796 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1797 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1798 &weight_op[0], &weight_avg[0],
1799 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1800 }else if(IS_16X8(mb_type)){
1801 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1802 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1803 &weight_op[1], &weight_avg[1],
1804 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1805 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1806 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1807 &weight_op[1], &weight_avg[1],
1808 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1809 }else if(IS_8X16(mb_type)){
1810 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1811 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1812 &weight_op[2], &weight_avg[2],
1813 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1814 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1815 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1816 &weight_op[2], &weight_avg[2],
1817 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 assert(IS_8X8(mb_type));
1824 const int sub_mb_type= h->sub_mb_type[i];
1826 int x_offset= (i&1)<<2;
1827 int y_offset= (i&2)<<1;
1829 if(IS_SUB_8X8(sub_mb_type)){
1830 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1832 &weight_op[3], &weight_avg[3],
1833 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1834 }else if(IS_SUB_8X4(sub_mb_type)){
1835 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1836 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1837 &weight_op[4], &weight_avg[4],
1838 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1839 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1840 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1841 &weight_op[4], &weight_avg[4],
1842 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1843 }else if(IS_SUB_4X8(sub_mb_type)){
1844 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1845 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1846 &weight_op[5], &weight_avg[5],
1847 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1848 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1849 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1850 &weight_op[5], &weight_avg[5],
1851 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 assert(IS_SUB_4X4(sub_mb_type));
1856 int sub_x_offset= x_offset + 2*(j&1);
1857 int sub_y_offset= y_offset + (j&2);
1858 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1859 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1860 &weight_op[6], &weight_avg[6],
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1867 prefetch_motion(h, 1);
1870 static av_cold void decode_init_vlc(void){
1871 static int done = 0;
1878 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1879 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1880 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1881 &chroma_dc_coeff_token_len [0], 1, 1,
1882 &chroma_dc_coeff_token_bits[0], 1, 1,
1883 INIT_VLC_USE_NEW_STATIC);
1887 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1888 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1889 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1890 &coeff_token_len [i][0], 1, 1,
1891 &coeff_token_bits[i][0], 1, 1,
1892 INIT_VLC_USE_NEW_STATIC);
1893 offset += coeff_token_vlc_tables_size[i];
1896 * This is a one time safety check to make sure that
1897 * the packed static coeff_token_vlc table sizes
1898 * were initialized correctly.
1900 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1903 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1904 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1905 init_vlc(&chroma_dc_total_zeros_vlc[i],
1906 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1907 &chroma_dc_total_zeros_len [i][0], 1, 1,
1908 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1909 INIT_VLC_USE_NEW_STATIC);
1911 for(i=0; i<15; i++){
1912 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1913 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1914 init_vlc(&total_zeros_vlc[i],
1915 TOTAL_ZEROS_VLC_BITS, 16,
1916 &total_zeros_len [i][0], 1, 1,
1917 &total_zeros_bits[i][0], 1, 1,
1918 INIT_VLC_USE_NEW_STATIC);
1922 run_vlc[i].table = run_vlc_tables[i];
1923 run_vlc[i].table_allocated = run_vlc_tables_size;
1924 init_vlc(&run_vlc[i],
1926 &run_len [i][0], 1, 1,
1927 &run_bits[i][0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1930 run7_vlc.table = run7_vlc_table,
1931 run7_vlc.table_allocated = run7_vlc_table_size;
1932 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1933 &run_len [6][0], 1, 1,
1934 &run_bits[6][0], 1, 1,
1935 INIT_VLC_USE_NEW_STATIC);
1939 static void free_tables(H264Context *h){
1942 av_freep(&h->intra4x4_pred_mode);
1943 av_freep(&h->chroma_pred_mode_table);
1944 av_freep(&h->cbp_table);
1945 av_freep(&h->mvd_table[0]);
1946 av_freep(&h->mvd_table[1]);
1947 av_freep(&h->direct_table);
1948 av_freep(&h->non_zero_count);
1949 av_freep(&h->slice_table_base);
1950 h->slice_table= NULL;
1952 av_freep(&h->mb2b_xy);
1953 av_freep(&h->mb2b8_xy);
1955 for(i = 0; i < h->s.avctx->thread_count; i++) {
1956 hx = h->thread_context[i];
1958 av_freep(&hx->top_borders[1]);
1959 av_freep(&hx->top_borders[0]);
1960 av_freep(&hx->s.obmc_scratchpad);
1964 static void init_dequant8_coeff_table(H264Context *h){
1966 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1967 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1968 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1970 for(i=0; i<2; i++ ){
1971 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1972 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1976 for(q=0; q<52; q++){
1977 int shift = div6[q];
1980 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1981 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1982 h->pps.scaling_matrix8[i][x]) << shift;
1987 static void init_dequant4_coeff_table(H264Context *h){
1989 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1990 for(i=0; i<6; i++ ){
1991 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1993 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1994 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2001 for(q=0; q<52; q++){
2002 int shift = div6[q] + 2;
2005 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2006 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2007 h->pps.scaling_matrix4[i][x]) << shift;
2012 static void init_dequant_tables(H264Context *h){
2014 init_dequant4_coeff_table(h);
2015 if(h->pps.transform_8x8_mode)
2016 init_dequant8_coeff_table(h);
2017 if(h->sps.transform_bypass){
2020 h->dequant4_coeff[i][0][x] = 1<<6;
2021 if(h->pps.transform_8x8_mode)
2024 h->dequant8_coeff[i][0][x] = 1<<6;
2031 * needs width/height
2033 static int alloc_tables(H264Context *h){
2034 MpegEncContext * const s = &h->s;
2035 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2038 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2040 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2041 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2042 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2044 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2045 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2046 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2047 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2049 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2050 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2052 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2053 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2054 for(y=0; y<s->mb_height; y++){
2055 for(x=0; x<s->mb_width; x++){
2056 const int mb_xy= x + y*s->mb_stride;
2057 const int b_xy = 4*x + 4*y*h->b_stride;
2058 const int b8_xy= 2*x + 2*y*h->b8_stride;
2060 h->mb2b_xy [mb_xy]= b_xy;
2061 h->mb2b8_xy[mb_xy]= b8_xy;
2065 s->obmc_scratchpad = NULL;
2067 if(!h->dequant4_coeff[0])
2068 init_dequant_tables(h);
2077 * Mimic alloc_tables(), but for every context thread.
2079 static void clone_tables(H264Context *dst, H264Context *src){
2080 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2081 dst->non_zero_count = src->non_zero_count;
2082 dst->slice_table = src->slice_table;
2083 dst->cbp_table = src->cbp_table;
2084 dst->mb2b_xy = src->mb2b_xy;
2085 dst->mb2b8_xy = src->mb2b8_xy;
2086 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2087 dst->mvd_table[0] = src->mvd_table[0];
2088 dst->mvd_table[1] = src->mvd_table[1];
2089 dst->direct_table = src->direct_table;
2091 dst->s.obmc_scratchpad = NULL;
2092 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2097 * Allocate buffers which are not shared amongst multiple threads.
2099 static int context_init(H264Context *h){
2100 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2105 return -1; // free_tables will clean up for us
2108 static av_cold void common_init(H264Context *h){
2109 MpegEncContext * const s = &h->s;
2111 s->width = s->avctx->width;
2112 s->height = s->avctx->height;
2113 s->codec_id= s->avctx->codec->id;
2115 ff_h264_pred_init(&h->hpc, s->codec_id);
2117 h->dequant_coeff_pps= -1;
2118 s->unrestricted_mv=1;
2119 s->decode=1; //FIXME
2121 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2122 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2125 static av_cold int decode_init(AVCodecContext *avctx){
2126 H264Context *h= avctx->priv_data;
2127 MpegEncContext * const s = &h->s;
2129 MPV_decode_defaults(s);
2134 s->out_format = FMT_H264;
2135 s->workaround_bugs= avctx->workaround_bugs;
2138 // s->decode_mb= ff_h263_decode_mb;
2139 s->quarter_sample = 1;
2142 if(avctx->codec_id == CODEC_ID_SVQ3)
2143 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2145 avctx->pix_fmt= PIX_FMT_YUV420P;
2149 if(avctx->extradata_size > 0 && avctx->extradata &&
2150 *(char *)avctx->extradata == 1){
2157 h->thread_context[0] = h;
2158 h->outputed_poc = INT_MIN;
2159 h->prev_poc_msb= 1<<16;
2163 static int frame_start(H264Context *h){
2164 MpegEncContext * const s = &h->s;
2167 if(MPV_frame_start(s, s->avctx) < 0)
2169 ff_er_frame_start(s);
2171 * MPV_frame_start uses pict_type to derive key_frame.
2172 * This is incorrect for H.264; IDR markings must be used.
2173 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2174 * See decode_nal_units().
2176 s->current_picture_ptr->key_frame= 0;
2178 assert(s->linesize && s->uvlinesize);
2180 for(i=0; i<16; i++){
2181 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2182 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2185 h->block_offset[16+i]=
2186 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2187 h->block_offset[24+16+i]=
2188 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2191 /* can't be in alloc_tables because linesize isn't known there.
2192 * FIXME: redo bipred weight to not require extra buffer? */
2193 for(i = 0; i < s->avctx->thread_count; i++)
2194 if(!h->thread_context[i]->s.obmc_scratchpad)
2195 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2197 /* some macroblocks will be accessed before they're available */
2198 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2199 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2201 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2203 // We mark the current picture as non-reference after allocating it, so
2204 // that if we break out due to an error it can be released automatically
2205 // in the next MPV_frame_start().
2206 // SVQ3 as well as most other codecs have only last/next/current and thus
2207 // get released even with set reference, besides SVQ3 and others do not
2208 // mark frames as reference later "naturally".
2209 if(s->codec_id != CODEC_ID_SVQ3)
2210 s->current_picture_ptr->reference= 0;
2212 s->current_picture_ptr->field_poc[0]=
2213 s->current_picture_ptr->field_poc[1]= INT_MAX;
2214 assert(s->current_picture_ptr->long_ref==0);
2219 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2220 MpegEncContext * const s = &h->s;
2229 src_cb -= uvlinesize;
2230 src_cr -= uvlinesize;
2232 if(!simple && FRAME_MBAFF){
2234 offset = MB_MBAFF ? 1 : 17;
2235 uvoffset= MB_MBAFF ? 1 : 9;
2237 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2238 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2239 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2240 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2246 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2247 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2248 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2249 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2255 top_idx = MB_MBAFF ? 0 : 1;
2257 step= MB_MBAFF ? 2 : 1;
2260 // There are two lines saved, the line above the the top macroblock of a pair,
2261 // and the line above the bottom macroblock
2262 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2263 for(i=1; i<17 - skiplast; i++){
2264 h->left_border[offset+i*step]= src_y[15+i* linesize];
2267 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2268 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2270 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2271 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2272 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2273 for(i=1; i<9 - skiplast; i++){
2274 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2275 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2277 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2278 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2282 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2283 MpegEncContext * const s = &h->s;
2294 if(!simple && FRAME_MBAFF){
2296 offset = MB_MBAFF ? 1 : 17;
2297 uvoffset= MB_MBAFF ? 1 : 9;
2301 top_idx = MB_MBAFF ? 0 : 1;
2303 step= MB_MBAFF ? 2 : 1;
2306 if(h->deblocking_filter == 2) {
2308 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2309 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2311 deblock_left = (s->mb_x > 0);
2312 deblock_top = (s->mb_y > !!MB_FIELD);
2315 src_y -= linesize + 1;
2316 src_cb -= uvlinesize + 1;
2317 src_cr -= uvlinesize + 1;
2319 #define XCHG(a,b,t,xchg)\
2326 for(i = !deblock_top; i<16; i++){
2327 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2329 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2333 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2334 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2335 if(s->mb_x+1 < s->mb_width){
2336 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2340 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2342 for(i = !deblock_top; i<8; i++){
2343 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2344 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2346 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2347 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2350 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2351 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2356 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2357 MpegEncContext * const s = &h->s;
2358 const int mb_x= s->mb_x;
2359 const int mb_y= s->mb_y;
2360 const int mb_xy= h->mb_xy;
2361 const int mb_type= s->current_picture.mb_type[mb_xy];
2362 uint8_t *dest_y, *dest_cb, *dest_cr;
2363 int linesize, uvlinesize /*dct_offset*/;
2365 int *block_offset = &h->block_offset[0];
2366 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2367 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2368 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2369 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2371 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2372 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2373 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2375 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2376 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2378 if (!simple && MB_FIELD) {
2379 linesize = h->mb_linesize = s->linesize * 2;
2380 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2381 block_offset = &h->block_offset[24];
2382 if(mb_y&1){ //FIXME move out of this function?
2383 dest_y -= s->linesize*15;
2384 dest_cb-= s->uvlinesize*7;
2385 dest_cr-= s->uvlinesize*7;
2389 for(list=0; list<h->list_count; list++){
2390 if(!USES_LIST(mb_type, list))
2392 if(IS_16X16(mb_type)){
2393 int8_t *ref = &h->ref_cache[list][scan8[0]];
2394 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2396 for(i=0; i<16; i+=4){
2397 int ref = h->ref_cache[list][scan8[i]];
2399 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2405 linesize = h->mb_linesize = s->linesize;
2406 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2407 // dct_offset = s->linesize * 16;
2410 if (!simple && IS_INTRA_PCM(mb_type)) {
2411 for (i=0; i<16; i++) {
2412 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2414 for (i=0; i<8; i++) {
2415 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2416 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2419 if(IS_INTRA(mb_type)){
2420 if(h->deblocking_filter)
2421 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2423 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2424 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2425 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2428 if(IS_INTRA4x4(mb_type)){
2429 if(simple || !s->encoding){
2430 if(IS_8x8DCT(mb_type)){
2431 if(transform_bypass){
2433 idct_add = s->dsp.add_pixels8;
2435 idct_dc_add = s->dsp.h264_idct8_dc_add;
2436 idct_add = s->dsp.h264_idct8_add;
2438 for(i=0; i<16; i+=4){
2439 uint8_t * const ptr= dest_y + block_offset[i];
2440 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2441 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2442 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2444 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2445 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2446 (h->topright_samples_available<<i)&0x4000, linesize);
2448 if(nnz == 1 && h->mb[i*16])
2449 idct_dc_add(ptr, h->mb + i*16, linesize);
2451 idct_add (ptr, h->mb + i*16, linesize);
2456 if(transform_bypass){
2458 idct_add = s->dsp.add_pixels4;
2460 idct_dc_add = s->dsp.h264_idct_dc_add;
2461 idct_add = s->dsp.h264_idct_add;
2463 for(i=0; i<16; i++){
2464 uint8_t * const ptr= dest_y + block_offset[i];
2465 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2467 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2468 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2472 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2473 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2474 assert(mb_y || linesize <= block_offset[i]);
2475 if(!topright_avail){
2476 tr= ptr[3 - linesize]*0x01010101;
2477 topright= (uint8_t*) &tr;
2479 topright= ptr + 4 - linesize;
2483 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2484 nnz = h->non_zero_count_cache[ scan8[i] ];
2487 if(nnz == 1 && h->mb[i*16])
2488 idct_dc_add(ptr, h->mb + i*16, linesize);
2490 idct_add (ptr, h->mb + i*16, linesize);
2492 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2499 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2501 if(!transform_bypass)
2502 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2504 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2506 if(h->deblocking_filter)
2507 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2509 hl_motion(h, dest_y, dest_cb, dest_cr,
2510 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2511 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2512 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2516 if(!IS_INTRA4x4(mb_type)){
2518 if(IS_INTRA16x16(mb_type)){
2519 if(transform_bypass){
2520 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2521 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2523 for(i=0; i<16; i++){
2524 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2525 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2529 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2531 }else if(h->cbp&15){
2532 if(transform_bypass){
2533 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2534 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2535 for(i=0; i<16; i+=di){
2536 if(h->non_zero_count_cache[ scan8[i] ]){
2537 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2541 if(IS_8x8DCT(mb_type)){
2542 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2544 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2549 for(i=0; i<16; i++){
2550 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2551 uint8_t * const ptr= dest_y + block_offset[i];
2552 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2558 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2559 uint8_t *dest[2] = {dest_cb, dest_cr};
2560 if(transform_bypass){
2561 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2562 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2563 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2565 idct_add = s->dsp.add_pixels4;
2566 for(i=16; i<16+8; i++){
2567 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2568 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2572 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2573 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2575 idct_add = s->dsp.h264_idct_add;
2576 idct_dc_add = s->dsp.h264_idct_dc_add;
2577 for(i=16; i<16+8; i++){
2578 if(h->non_zero_count_cache[ scan8[i] ])
2579 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2580 else if(h->mb[i*16])
2581 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2584 for(i=16; i<16+8; i++){
2585 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2586 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2587 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2594 if(h->deblocking_filter) {
2595 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2596 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2597 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2598 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2599 if (!simple && FRAME_MBAFF) {
2600 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2602 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2608 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2610 static void hl_decode_mb_simple(H264Context *h){
2611 hl_decode_mb_internal(h, 1);
2615 * Process a macroblock; this handles edge cases, such as interlacing.
2617 static void av_noinline hl_decode_mb_complex(H264Context *h){
2618 hl_decode_mb_internal(h, 0);
2621 static void hl_decode_mb(H264Context *h){
2622 MpegEncContext * const s = &h->s;
2623 const int mb_xy= h->mb_xy;
2624 const int mb_type= s->current_picture.mb_type[mb_xy];
2625 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2627 if(ENABLE_H264_ENCODER && !s->decode)
2631 hl_decode_mb_complex(h);
2632 else hl_decode_mb_simple(h);
2635 static void pic_as_field(Picture *pic, const int parity){
2637 for (i = 0; i < 4; ++i) {
2638 if (parity == PICT_BOTTOM_FIELD)
2639 pic->data[i] += pic->linesize[i];
2640 pic->reference = parity;
2641 pic->linesize[i] *= 2;
2643 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2646 static int split_field_copy(Picture *dest, Picture *src,
2647 int parity, int id_add){
2648 int match = !!(src->reference & parity);
2652 if(parity != PICT_FRAME){
2653 pic_as_field(dest, parity);
2655 dest->pic_id += id_add;
2662 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2666 while(i[0]<len || i[1]<len){
2667 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2669 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2672 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2673 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2676 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2677 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2684 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2689 best_poc= dir ? INT_MIN : INT_MAX;
2691 for(i=0; i<len; i++){
2692 const int poc= src[i]->poc;
2693 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2695 sorted[out_i]= src[i];
2698 if(best_poc == (dir ? INT_MIN : INT_MAX))
2700 limit= sorted[out_i++]->poc - dir;
2706 * fills the default_ref_list.
2708 static int fill_default_ref_list(H264Context *h){
2709 MpegEncContext * const s = &h->s;
2712 if(h->slice_type_nos==FF_B_TYPE){
2713 Picture *sorted[32];
2718 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2720 cur_poc= s->current_picture_ptr->poc;
2722 for(list= 0; list<2; list++){
2723 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2724 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2726 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2727 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2730 if(len < h->ref_count[list])
2731 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2735 if(lens[0] == lens[1] && lens[1] > 1){
2736 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2738 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2741 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2742 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2744 if(len < h->ref_count[0])
2745 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2748 for (i=0; i<h->ref_count[0]; i++) {
2749 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2751 if(h->slice_type_nos==FF_B_TYPE){
2752 for (i=0; i<h->ref_count[1]; i++) {
2753 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2760 static void print_short_term(H264Context *h);
2761 static void print_long_term(H264Context *h);
2764 * Extract structure information about the picture described by pic_num in
2765 * the current decoding context (frame or field). Note that pic_num is
2766 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2767 * @param pic_num picture number for which to extract structure information
2768 * @param structure one of PICT_XXX describing structure of picture
2770 * @return frame number (short term) or long term index of picture
2771 * described by pic_num
2773 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2774 MpegEncContext * const s = &h->s;
2776 *structure = s->picture_structure;
2779 /* opposite field */
2780 *structure ^= PICT_FRAME;
2787 static int decode_ref_pic_list_reordering(H264Context *h){
2788 MpegEncContext * const s = &h->s;
2789 int list, index, pic_structure;
2791 print_short_term(h);
2794 for(list=0; list<h->list_count; list++){
2795 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2797 if(get_bits1(&s->gb)){
2798 int pred= h->curr_pic_num;
2800 for(index=0; ; index++){
2801 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2802 unsigned int pic_id;
2804 Picture *ref = NULL;
2806 if(reordering_of_pic_nums_idc==3)
2809 if(index >= h->ref_count[list]){
2810 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2814 if(reordering_of_pic_nums_idc<3){
2815 if(reordering_of_pic_nums_idc<2){
2816 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2819 if(abs_diff_pic_num > h->max_pic_num){
2820 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2824 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2825 else pred+= abs_diff_pic_num;
2826 pred &= h->max_pic_num - 1;
2828 frame_num = pic_num_extract(h, pred, &pic_structure);
2830 for(i= h->short_ref_count-1; i>=0; i--){
2831 ref = h->short_ref[i];
2832 assert(ref->reference);
2833 assert(!ref->long_ref);
2835 ref->frame_num == frame_num &&
2836 (ref->reference & pic_structure)
2844 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2846 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2849 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2852 ref = h->long_ref[long_idx];
2853 assert(!(ref && !ref->reference));
2854 if(ref && (ref->reference & pic_structure)){
2855 ref->pic_id= pic_id;
2856 assert(ref->long_ref);
2864 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2865 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2867 for(i=index; i+1<h->ref_count[list]; i++){
2868 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2871 for(; i > index; i--){
2872 h->ref_list[list][i]= h->ref_list[list][i-1];
2874 h->ref_list[list][index]= *ref;
2876 pic_as_field(&h->ref_list[list][index], pic_structure);
2880 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2886 for(list=0; list<h->list_count; list++){
2887 for(index= 0; index < h->ref_count[list]; index++){
2888 if(!h->ref_list[list][index].data[0]){
2889 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2890 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2898 static void fill_mbaff_ref_list(H264Context *h){
2900 for(list=0; list<2; list++){ //FIXME try list_count
2901 for(i=0; i<h->ref_count[list]; i++){
2902 Picture *frame = &h->ref_list[list][i];
2903 Picture *field = &h->ref_list[list][16+2*i];
2906 field[0].linesize[j] <<= 1;
2907 field[0].reference = PICT_TOP_FIELD;
2908 field[0].poc= field[0].field_poc[0];
2909 field[1] = field[0];
2911 field[1].data[j] += frame->linesize[j];
2912 field[1].reference = PICT_BOTTOM_FIELD;
2913 field[1].poc= field[1].field_poc[1];
2915 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2916 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2918 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2919 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2923 for(j=0; j<h->ref_count[1]; j++){
2924 for(i=0; i<h->ref_count[0]; i++)
2925 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2926 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2927 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2931 static int pred_weight_table(H264Context *h){
2932 MpegEncContext * const s = &h->s;
2934 int luma_def, chroma_def;
2937 h->use_weight_chroma= 0;
2938 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2939 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2940 luma_def = 1<<h->luma_log2_weight_denom;
2941 chroma_def = 1<<h->chroma_log2_weight_denom;
2943 for(list=0; list<2; list++){
2944 for(i=0; i<h->ref_count[list]; i++){
2945 int luma_weight_flag, chroma_weight_flag;
2947 luma_weight_flag= get_bits1(&s->gb);
2948 if(luma_weight_flag){
2949 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2950 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2951 if( h->luma_weight[list][i] != luma_def
2952 || h->luma_offset[list][i] != 0)
2955 h->luma_weight[list][i]= luma_def;
2956 h->luma_offset[list][i]= 0;
2960 chroma_weight_flag= get_bits1(&s->gb);
2961 if(chroma_weight_flag){
2964 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2965 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2966 if( h->chroma_weight[list][i][j] != chroma_def
2967 || h->chroma_offset[list][i][j] != 0)
2968 h->use_weight_chroma= 1;
2973 h->chroma_weight[list][i][j]= chroma_def;
2974 h->chroma_offset[list][i][j]= 0;
2979 if(h->slice_type_nos != FF_B_TYPE) break;
2981 h->use_weight= h->use_weight || h->use_weight_chroma;
2985 static void implicit_weight_table(H264Context *h){
2986 MpegEncContext * const s = &h->s;
2988 int cur_poc = s->current_picture_ptr->poc;
2990 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2991 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2993 h->use_weight_chroma= 0;
2998 h->use_weight_chroma= 2;
2999 h->luma_log2_weight_denom= 5;
3000 h->chroma_log2_weight_denom= 5;
3002 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3003 int poc0 = h->ref_list[0][ref0].poc;
3004 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3005 int poc1 = h->ref_list[1][ref1].poc;
3006 int td = av_clip(poc1 - poc0, -128, 127);
3008 int tb = av_clip(cur_poc - poc0, -128, 127);
3009 int tx = (16384 + (FFABS(td) >> 1)) / td;
3010 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3011 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3012 h->implicit_weight[ref0][ref1] = 32;
3014 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3016 h->implicit_weight[ref0][ref1] = 32;
3022 * Mark a picture as no longer needed for reference. The refmask
3023 * argument allows unreferencing of individual fields or the whole frame.
3024 * If the picture becomes entirely unreferenced, but is being held for
3025 * display purposes, it is marked as such.
3026 * @param refmask mask of fields to unreference; the mask is bitwise
3027 * anded with the reference marking of pic
3028 * @return non-zero if pic becomes entirely unreferenced (except possibly
3029 * for display purposes) zero if one of the fields remains in
3032 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3034 if (pic->reference &= refmask) {
3037 for(i = 0; h->delayed_pic[i]; i++)
3038 if(pic == h->delayed_pic[i]){
3039 pic->reference=DELAYED_PIC_REF;
3047 * instantaneous decoder refresh.
3049 static void idr(H264Context *h){
3052 for(i=0; i<16; i++){
3053 remove_long(h, i, 0);
3055 assert(h->long_ref_count==0);
3057 for(i=0; i<h->short_ref_count; i++){
3058 unreference_pic(h, h->short_ref[i], 0);
3059 h->short_ref[i]= NULL;
3061 h->short_ref_count=0;
3062 h->prev_frame_num= 0;
3063 h->prev_frame_num_offset= 0;
3068 /* forget old pics after a seek */
3069 static void flush_dpb(AVCodecContext *avctx){
3070 H264Context *h= avctx->priv_data;
3072 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3073 if(h->delayed_pic[i])
3074 h->delayed_pic[i]->reference= 0;
3075 h->delayed_pic[i]= NULL;
3077 h->outputed_poc= INT_MIN;
3079 if(h->s.current_picture_ptr)
3080 h->s.current_picture_ptr->reference= 0;
3081 h->s.first_field= 0;
3082 ff_mpeg_flush(avctx);
3086 * Find a Picture in the short term reference list by frame number.
3087 * @param frame_num frame number to search for
3088 * @param idx the index into h->short_ref where returned picture is found
3089 * undefined if no picture found.
3090 * @return pointer to the found picture, or NULL if no pic with the provided
3091 * frame number is found
3093 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3094 MpegEncContext * const s = &h->s;
3097 for(i=0; i<h->short_ref_count; i++){
3098 Picture *pic= h->short_ref[i];
3099 if(s->avctx->debug&FF_DEBUG_MMCO)
3100 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3101 if(pic->frame_num == frame_num) {
3110 * Remove a picture from the short term reference list by its index in
3111 * that list. This does no checking on the provided index; it is assumed
3112 * to be valid. Other list entries are shifted down.
3113 * @param i index into h->short_ref of picture to remove.
3115 static void remove_short_at_index(H264Context *h, int i){
3116 assert(i >= 0 && i < h->short_ref_count);
3117 h->short_ref[i]= NULL;
3118 if (--h->short_ref_count)
3119 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3124 * @return the removed picture or NULL if an error occurs
3126 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3127 MpegEncContext * const s = &h->s;
3131 if(s->avctx->debug&FF_DEBUG_MMCO)
3132 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3134 pic = find_short(h, frame_num, &i);
3136 if(unreference_pic(h, pic, ref_mask))
3137 remove_short_at_index(h, i);
3144 * Remove a picture from the long term reference list by its index in
3146 * @return the removed picture or NULL if an error occurs
3148 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3151 pic= h->long_ref[i];
3153 if(unreference_pic(h, pic, ref_mask)){
3154 assert(h->long_ref[i]->long_ref == 1);
3155 h->long_ref[i]->long_ref= 0;
3156 h->long_ref[i]= NULL;
3157 h->long_ref_count--;
3165 * print short term list
3167 static void print_short_term(H264Context *h) {
3169 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3170 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3171 for(i=0; i<h->short_ref_count; i++){
3172 Picture *pic= h->short_ref[i];
3173 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3179 * print long term list
3181 static void print_long_term(H264Context *h) {
3183 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3184 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3185 for(i = 0; i < 16; i++){
3186 Picture *pic= h->long_ref[i];
3188 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3195 * Executes the reference picture marking (memory management control operations).
3197 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3198 MpegEncContext * const s = &h->s;
3200 int current_ref_assigned=0;
3203 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3204 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3206 for(i=0; i<mmco_count; i++){
3207 int structure, frame_num;
3208 if(s->avctx->debug&FF_DEBUG_MMCO)
3209 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3211 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3212 || mmco[i].opcode == MMCO_SHORT2LONG){
3213 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3214 pic = find_short(h, frame_num, &j);
3216 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3217 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3218 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3223 switch(mmco[i].opcode){
3224 case MMCO_SHORT2UNUSED:
3225 if(s->avctx->debug&FF_DEBUG_MMCO)
3226 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3227 remove_short(h, frame_num, structure ^ PICT_FRAME);
3229 case MMCO_SHORT2LONG:
3230 if (h->long_ref[mmco[i].long_arg] != pic)
3231 remove_long(h, mmco[i].long_arg, 0);
3233 remove_short_at_index(h, j);
3234 h->long_ref[ mmco[i].long_arg ]= pic;
3235 if (h->long_ref[ mmco[i].long_arg ]){
3236 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3237 h->long_ref_count++;
3240 case MMCO_LONG2UNUSED:
3241 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3242 pic = h->long_ref[j];
3244 remove_long(h, j, structure ^ PICT_FRAME);
3245 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3246 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3249 // Comment below left from previous code as it is an interresting note.
3250 /* First field in pair is in short term list or
3251 * at a different long term index.
3252 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3253 * Report the problem and keep the pair where it is,
3254 * and mark this field valid.
3257 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3258 remove_long(h, mmco[i].long_arg, 0);
3260 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3261 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3262 h->long_ref_count++;
3265 s->current_picture_ptr->reference |= s->picture_structure;
3266 current_ref_assigned=1;
3268 case MMCO_SET_MAX_LONG:
3269 assert(mmco[i].long_arg <= 16);
3270 // just remove the long term which index is greater than new max
3271 for(j = mmco[i].long_arg; j<16; j++){
3272 remove_long(h, j, 0);
3276 while(h->short_ref_count){
3277 remove_short(h, h->short_ref[0]->frame_num, 0);
3279 for(j = 0; j < 16; j++) {
3280 remove_long(h, j, 0);
3282 s->current_picture_ptr->poc=
3283 s->current_picture_ptr->field_poc[0]=
3284 s->current_picture_ptr->field_poc[1]=
3288 s->current_picture_ptr->frame_num= 0;
3294 if (!current_ref_assigned) {
3295 /* Second field of complementary field pair; the first field of
3296 * which is already referenced. If short referenced, it
3297 * should be first entry in short_ref. If not, it must exist
3298 * in long_ref; trying to put it on the short list here is an
3299 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3301 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3302 /* Just mark the second field valid */
3303 s->current_picture_ptr->reference = PICT_FRAME;
3304 } else if (s->current_picture_ptr->long_ref) {
3305 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3306 "assignment for second field "
3307 "in complementary field pair "
3308 "(first field is long term)\n");
3310 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3312 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3315 if(h->short_ref_count)
3316 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3318 h->short_ref[0]= s->current_picture_ptr;
3319 h->short_ref_count++;
3320 s->current_picture_ptr->reference |= s->picture_structure;
3324 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3326 /* We have too many reference frames, probably due to corrupted
3327 * stream. Need to discard one frame. Prevents overrun of the
3328 * short_ref and long_ref buffers.
3330 av_log(h->s.avctx, AV_LOG_ERROR,
3331 "number of reference frames exceeds max (probably "
3332 "corrupt input), discarding one\n");
3334 if (h->long_ref_count && !h->short_ref_count) {
3335 for (i = 0; i < 16; ++i)
3340 remove_long(h, i, 0);
3342 pic = h->short_ref[h->short_ref_count - 1];
3343 remove_short(h, pic->frame_num, 0);
3347 print_short_term(h);
3352 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3353 MpegEncContext * const s = &h->s;
3357 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3358 s->broken_link= get_bits1(gb) -1;
3360 h->mmco[0].opcode= MMCO_LONG;
3361 h->mmco[0].long_arg= 0;
3365 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3366 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3367 MMCOOpcode opcode= get_ue_golomb(gb);
3369 h->mmco[i].opcode= opcode;
3370 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3371 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3372 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3373 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3377 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3378 unsigned int long_arg= get_ue_golomb(gb);
3379 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3380 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3383 h->mmco[i].long_arg= long_arg;
3386 if(opcode > (unsigned)MMCO_LONG){
3387 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3390 if(opcode == MMCO_END)
3395 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3397 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3398 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3399 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3400 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3402 if (FIELD_PICTURE) {
3403 h->mmco[0].short_pic_num *= 2;
3404 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3405 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3415 static int init_poc(H264Context *h){
3416 MpegEncContext * const s = &h->s;
3417 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3419 Picture *cur = s->current_picture_ptr;
3421 h->frame_num_offset= h->prev_frame_num_offset;
3422 if(h->frame_num < h->prev_frame_num)
3423 h->frame_num_offset += max_frame_num;
3425 if(h->sps.poc_type==0){
3426 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3428 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3429 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3430 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3431 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3433 h->poc_msb = h->prev_poc_msb;
3434 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3436 field_poc[1] = h->poc_msb + h->poc_lsb;
3437 if(s->picture_structure == PICT_FRAME)
3438 field_poc[1] += h->delta_poc_bottom;
3439 }else if(h->sps.poc_type==1){
3440 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3443 if(h->sps.poc_cycle_length != 0)
3444 abs_frame_num = h->frame_num_offset + h->frame_num;
3448 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3451 expected_delta_per_poc_cycle = 0;
3452 for(i=0; i < h->sps.poc_cycle_length; i++)
3453 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3455 if(abs_frame_num > 0){
3456 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3457 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3459 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3460 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3461 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3465 if(h->nal_ref_idc == 0)
3466 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3468 field_poc[0] = expectedpoc + h->delta_poc[0];
3469 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3471 if(s->picture_structure == PICT_FRAME)
3472 field_poc[1] += h->delta_poc[1];
3474 int poc= 2*(h->frame_num_offset + h->frame_num);
3483 if(s->picture_structure != PICT_BOTTOM_FIELD)
3484 s->current_picture_ptr->field_poc[0]= field_poc[0];
3485 if(s->picture_structure != PICT_TOP_FIELD)
3486 s->current_picture_ptr->field_poc[1]= field_poc[1];
3487 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3494 * initialize scan tables
3496 static void init_scan_tables(H264Context *h){
3497 MpegEncContext * const s = &h->s;
3499 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3500 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3501 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3503 for(i=0; i<16; i++){
3504 #define T(x) (x>>2) | ((x<<2) & 0xF)
3505 h->zigzag_scan[i] = T(zigzag_scan[i]);
3506 h-> field_scan[i] = T( field_scan[i]);
3510 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3511 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3512 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3513 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3514 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3516 for(i=0; i<64; i++){
3517 #define T(x) (x>>3) | ((x&7)<<3)
3518 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3519 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3520 h->field_scan8x8[i] = T(field_scan8x8[i]);
3521 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3525 if(h->sps.transform_bypass){ //FIXME same ugly
3526 h->zigzag_scan_q0 = zigzag_scan;
3527 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3528 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3529 h->field_scan_q0 = field_scan;
3530 h->field_scan8x8_q0 = field_scan8x8;
3531 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3533 h->zigzag_scan_q0 = h->zigzag_scan;
3534 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3535 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3536 h->field_scan_q0 = h->field_scan;
3537 h->field_scan8x8_q0 = h->field_scan8x8;
3538 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3543 * Replicates H264 "master" context to thread contexts.
3545 static void clone_slice(H264Context *dst, H264Context *src)
3547 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3548 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3549 dst->s.current_picture = src->s.current_picture;
3550 dst->s.linesize = src->s.linesize;
3551 dst->s.uvlinesize = src->s.uvlinesize;
3552 dst->s.first_field = src->s.first_field;
3554 dst->prev_poc_msb = src->prev_poc_msb;
3555 dst->prev_poc_lsb = src->prev_poc_lsb;
3556 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3557 dst->prev_frame_num = src->prev_frame_num;
3558 dst->short_ref_count = src->short_ref_count;
3560 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3561 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3562 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3563 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3565 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3566 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3570 * decodes a slice header.
3571 * This will also call MPV_common_init() and frame_start() as needed.
3573 * @param h h264context
3574 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3576 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3578 static int decode_slice_header(H264Context *h, H264Context *h0){
3579 MpegEncContext * const s = &h->s;
3580 MpegEncContext * const s0 = &h0->s;
3581 unsigned int first_mb_in_slice;
3582 unsigned int pps_id;
3583 int num_ref_idx_active_override_flag;
3584 unsigned int slice_type, tmp, i, j;
3585 int default_ref_list_done = 0;
3586 int last_pic_structure;
3588 s->dropable= h->nal_ref_idc == 0;
3590 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3591 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3592 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3594 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3595 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3598 first_mb_in_slice= get_ue_golomb(&s->gb);
3600 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3601 h0->current_slice = 0;
3602 if (!s0->first_field)
3603 s->current_picture_ptr= NULL;
3606 slice_type= get_ue_golomb(&s->gb);
3608 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3613 h->slice_type_fixed=1;
3615 h->slice_type_fixed=0;
3617 slice_type= golomb_to_pict_type[ slice_type ];
3618 if (slice_type == FF_I_TYPE
3619 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3620 default_ref_list_done = 1;
3622 h->slice_type= slice_type;
3623 h->slice_type_nos= slice_type & 3;
3625 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3626 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3627 av_log(h->s.avctx, AV_LOG_ERROR,
3628 "B picture before any references, skipping\n");
3632 pps_id= get_ue_golomb(&s->gb);
3633 if(pps_id>=MAX_PPS_COUNT){
3634 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3637 if(!h0->pps_buffers[pps_id]) {
3638 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3641 h->pps= *h0->pps_buffers[pps_id];
3643 if(!h0->sps_buffers[h->pps.sps_id]) {
3644 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3647 h->sps = *h0->sps_buffers[h->pps.sps_id];
3649 if(h == h0 && h->dequant_coeff_pps != pps_id){
3650 h->dequant_coeff_pps = pps_id;
3651 init_dequant_tables(h);
3654 s->mb_width= h->sps.mb_width;
3655 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3657 h->b_stride= s->mb_width*4;
3658 h->b8_stride= s->mb_width*2;
3660 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3661 if(h->sps.frame_mbs_only_flag)
3662 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3664 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3666 if (s->context_initialized
3667 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3669 return -1; // width / height changed during parallelized decoding
3671 flush_dpb(s->avctx);
3674 if (!s->context_initialized) {
3676 return -1; // we cant (re-)initialize context during parallel decoding
3677 if (MPV_common_init(s) < 0)
3681 init_scan_tables(h);
3684 for(i = 1; i < s->avctx->thread_count; i++) {
3686 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3687 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3688 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3691 init_scan_tables(c);
3695 for(i = 0; i < s->avctx->thread_count; i++)
3696 if(context_init(h->thread_context[i]) < 0)
3699 s->avctx->width = s->width;
3700 s->avctx->height = s->height;
3701 s->avctx->sample_aspect_ratio= h->sps.sar;
3702 if(!s->avctx->sample_aspect_ratio.den)
3703 s->avctx->sample_aspect_ratio.den = 1;
3705 if(h->sps.timing_info_present_flag){
3706 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3707 if(h->x264_build > 0 && h->x264_build < 44)
3708 s->avctx->time_base.den *= 2;
3709 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3710 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3714 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3717 h->mb_aff_frame = 0;
3718 last_pic_structure = s0->picture_structure;
3719 if(h->sps.frame_mbs_only_flag){
3720 s->picture_structure= PICT_FRAME;
3722 if(get_bits1(&s->gb)) { //field_pic_flag
3723 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3725 s->picture_structure= PICT_FRAME;
3726 h->mb_aff_frame = h->sps.mb_aff;
3729 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3731 if(h0->current_slice == 0){
3732 while(h->frame_num != h->prev_frame_num &&
3733 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3734 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3736 h->prev_frame_num++;
3737 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3738 s->current_picture_ptr->frame_num= h->prev_frame_num;
3739 execute_ref_pic_marking(h, NULL, 0);
3742 /* See if we have a decoded first field looking for a pair... */
3743 if (s0->first_field) {
3744 assert(s0->current_picture_ptr);
3745 assert(s0->current_picture_ptr->data[0]);
3746 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3748 /* figure out if we have a complementary field pair */
3749 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3751 * Previous field is unmatched. Don't display it, but let it
3752 * remain for reference if marked as such.
3754 s0->current_picture_ptr = NULL;
3755 s0->first_field = FIELD_PICTURE;
3758 if (h->nal_ref_idc &&
3759 s0->current_picture_ptr->reference &&
3760 s0->current_picture_ptr->frame_num != h->frame_num) {
3762 * This and previous field were reference, but had
3763 * different frame_nums. Consider this field first in
3764 * pair. Throw away previous field except for reference
3767 s0->first_field = 1;
3768 s0->current_picture_ptr = NULL;
3771 /* Second field in complementary pair */
3772 s0->first_field = 0;
3777 /* Frame or first field in a potentially complementary pair */
3778 assert(!s0->current_picture_ptr);
3779 s0->first_field = FIELD_PICTURE;
3782 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3783 s0->first_field = 0;
3790 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3792 assert(s->mb_num == s->mb_width * s->mb_height);
3793 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3794 first_mb_in_slice >= s->mb_num){
3795 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3798 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3799 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3800 if (s->picture_structure == PICT_BOTTOM_FIELD)
3801 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3802 assert(s->mb_y < s->mb_height);
3804 if(s->picture_structure==PICT_FRAME){
3805 h->curr_pic_num= h->frame_num;
3806 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3808 h->curr_pic_num= 2*h->frame_num + 1;
3809 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3812 if(h->nal_unit_type == NAL_IDR_SLICE){
3813 get_ue_golomb(&s->gb); /* idr_pic_id */
3816 if(h->sps.poc_type==0){
3817 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3819 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3820 h->delta_poc_bottom= get_se_golomb(&s->gb);
3824 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3825 h->delta_poc[0]= get_se_golomb(&s->gb);
3827 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3828 h->delta_poc[1]= get_se_golomb(&s->gb);
3833 if(h->pps.redundant_pic_cnt_present){
3834 h->redundant_pic_count= get_ue_golomb(&s->gb);
3837 //set defaults, might be overridden a few lines later
3838 h->ref_count[0]= h->pps.ref_count[0];
3839 h->ref_count[1]= h->pps.ref_count[1];
3841 if(h->slice_type_nos != FF_I_TYPE){
3842 if(h->slice_type_nos == FF_B_TYPE){
3843 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3845 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3847 if(num_ref_idx_active_override_flag){
3848 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3849 if(h->slice_type_nos==FF_B_TYPE)
3850 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3852 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3853 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3854 h->ref_count[0]= h->ref_count[1]= 1;
3858 if(h->slice_type_nos == FF_B_TYPE)
3865 if(!default_ref_list_done){
3866 fill_default_ref_list(h);
3869 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3872 if(h->slice_type_nos!=FF_I_TYPE){
3873 s->last_picture_ptr= &h->ref_list[0][0];
3874 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3876 if(h->slice_type_nos==FF_B_TYPE){
3877 s->next_picture_ptr= &h->ref_list[1][0];
3878 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3881 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3882 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3883 pred_weight_table(h);
3884 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3885 implicit_weight_table(h);
3890 decode_ref_pic_marking(h0, &s->gb);
3893 fill_mbaff_ref_list(h);
3895 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3896 direct_dist_scale_factor(h);
3897 direct_ref_list_init(h);
3899 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3900 tmp = get_ue_golomb(&s->gb);
3902 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3905 h->cabac_init_idc= tmp;
3908 h->last_qscale_diff = 0;
3909 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3911 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3915 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3916 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3917 //FIXME qscale / qp ... stuff
3918 if(h->slice_type == FF_SP_TYPE){
3919 get_bits1(&s->gb); /* sp_for_switch_flag */
3921 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3922 get_se_golomb(&s->gb); /* slice_qs_delta */
3925 h->deblocking_filter = 1;
3926 h->slice_alpha_c0_offset = 0;
3927 h->slice_beta_offset = 0;
3928 if( h->pps.deblocking_filter_parameters_present ) {
3929 tmp= get_ue_golomb(&s->gb);
3931 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3934 h->deblocking_filter= tmp;
3935 if(h->deblocking_filter < 2)
3936 h->deblocking_filter^= 1; // 1<->0
3938 if( h->deblocking_filter ) {
3939 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3940 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3944 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3945 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3946 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3947 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3948 h->deblocking_filter= 0;
3950 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3951 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3952 /* Cheat slightly for speed:
3953 Do not bother to deblock across slices. */
3954 h->deblocking_filter = 2;
3956 h0->max_contexts = 1;
3957 if(!h0->single_decode_warning) {
3958 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3959 h0->single_decode_warning = 1;
3962 return 1; // deblocking switched inside frame
3967 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3968 slice_group_change_cycle= get_bits(&s->gb, ?);
3971 h0->last_slice_type = slice_type;
3972 h->slice_num = ++h0->current_slice;
3973 if(h->slice_num >= MAX_SLICES){
3974 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3978 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3982 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3983 +(h->ref_list[j][i].reference&3);
3986 for(i=16; i<48; i++)
3987 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3988 +(h->ref_list[j][i].reference&3);
3991 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3992 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3994 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3995 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3997 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3999 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4000 pps_id, h->frame_num,
4001 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4002 h->ref_count[0], h->ref_count[1],
4004 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4006 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4007 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4017 static inline int get_level_prefix(GetBitContext *gb){
4021 OPEN_READER(re, gb);
4022 UPDATE_CACHE(re, gb);
4023 buf=GET_CACHE(re, gb);
4025 log= 32 - av_log2(buf);
4027 print_bin(buf>>(32-log), log);
4028 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4031 LAST_SKIP_BITS(re, gb, log);
4032 CLOSE_READER(re, gb);
4037 static inline int get_dct8x8_allowed(H264Context *h){
4040 if(!IS_SUB_8X8(h->sub_mb_type[i])
4041 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4048 * decodes a residual block.
4049 * @param n block index
4050 * @param scantable scantable
4051 * @param max_coeff number of coefficients in the block
4052 * @return <0 if an error occurred
4054 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4055 MpegEncContext * const s = &h->s;
4056 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4058 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4060 //FIXME put trailing_onex into the context
4062 if(n == CHROMA_DC_BLOCK_INDEX){
4063 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4064 total_coeff= coeff_token>>2;
4066 if(n == LUMA_DC_BLOCK_INDEX){
4067 total_coeff= pred_non_zero_count(h, 0);
4068 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4069 total_coeff= coeff_token>>2;
4071 total_coeff= pred_non_zero_count(h, n);
4072 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4073 total_coeff= coeff_token>>2;
4074 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4078 //FIXME set last_non_zero?
4082 if(total_coeff > (unsigned)max_coeff) {
4083 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4087 trailing_ones= coeff_token&3;
4088 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4089 assert(total_coeff<=16);
4091 i = show_bits(gb, 3);
4092 skip_bits(gb, trailing_ones);
4093 level[0] = 1-((i&4)>>1);
4094 level[1] = 1-((i&2) );
4095 level[2] = 1-((i&1)<<1);
4097 if(trailing_ones<total_coeff) {
4098 int level_code, mask;
4099 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4100 int prefix= get_level_prefix(gb);
4102 //first coefficient has suffix_length equal to 0 or 1
4103 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4105 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4107 level_code= (prefix<<suffix_length); //part
4108 }else if(prefix==14){
4110 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4112 level_code= prefix + get_bits(gb, 4); //part
4114 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4115 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4117 level_code += (1<<(prefix-3))-4096;
4120 if(trailing_ones < 3) level_code += 2;
4125 mask= -(level_code&1);
4126 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4128 //remaining coefficients have suffix_length > 0
4129 for(i=trailing_ones+1;i<total_coeff;i++) {
4130 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4131 prefix = get_level_prefix(gb);
4133 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4135 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4137 level_code += (1<<(prefix-3))-4096;
4139 mask= -(level_code&1);
4140 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4141 if(level_code > suffix_limit[suffix_length])
4146 if(total_coeff == max_coeff)
4149 if(n == CHROMA_DC_BLOCK_INDEX)
4150 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4152 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4155 coeff_num = zeros_left + total_coeff - 1;
4156 j = scantable[coeff_num];
4158 block[j] = level[0];
4159 for(i=1;i<total_coeff;i++) {
4162 else if(zeros_left < 7){
4163 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4165 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4167 zeros_left -= run_before;
4168 coeff_num -= 1 + run_before;
4169 j= scantable[ coeff_num ];
4174 block[j] = (level[0] * qmul[j] + 32)>>6;
4175 for(i=1;i<total_coeff;i++) {
4178 else if(zeros_left < 7){
4179 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4181 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4183 zeros_left -= run_before;
4184 coeff_num -= 1 + run_before;
4185 j= scantable[ coeff_num ];
4187 block[j]= (level[i] * qmul[j] + 32)>>6;
4192 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4199 static void predict_field_decoding_flag(H264Context *h){
4200 MpegEncContext * const s = &h->s;
4201 const int mb_xy= h->mb_xy;
4202 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4203 ? s->current_picture.mb_type[mb_xy-1]
4204 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4205 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4207 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4211 * decodes a P_SKIP or B_SKIP macroblock
4213 static void decode_mb_skip(H264Context *h){
4214 MpegEncContext * const s = &h->s;
4215 const int mb_xy= h->mb_xy;
4218 memset(h->non_zero_count[mb_xy], 0, 16);
4219 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4222 mb_type|= MB_TYPE_INTERLACED;
4224 if( h->slice_type_nos == FF_B_TYPE )
4226 // just for fill_caches. pred_direct_motion will set the real mb_type
4227 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4229 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4230 pred_direct_motion(h, &mb_type);
4231 mb_type|= MB_TYPE_SKIP;
4236 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4238 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4239 pred_pskip_motion(h, &mx, &my);
4240 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4241 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4244 write_back_motion(h, mb_type);
4245 s->current_picture.mb_type[mb_xy]= mb_type;
4246 s->current_picture.qscale_table[mb_xy]= s->qscale;
4247 h->slice_table[ mb_xy ]= h->slice_num;
4248 h->prev_mb_skipped= 1;
4252 * decodes a macroblock
4253 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4255 static int decode_mb_cavlc(H264Context *h){
4256 MpegEncContext * const s = &h->s;
4258 int partition_count;
4259 unsigned int mb_type, cbp;
4260 int dct8x8_allowed= h->pps.transform_8x8_mode;
4262 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4264 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4266 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4267 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4269 if(h->slice_type_nos != FF_I_TYPE){
4270 if(s->mb_skip_run==-1)
4271 s->mb_skip_run= get_ue_golomb(&s->gb);
4273 if (s->mb_skip_run--) {
4274 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4275 if(s->mb_skip_run==0)
4276 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4278 predict_field_decoding_flag(h);
4285 if( (s->mb_y&1) == 0 )
4286 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4289 h->prev_mb_skipped= 0;
4291 mb_type= get_ue_golomb(&s->gb);
4292 if(h->slice_type_nos == FF_B_TYPE){
4294 partition_count= b_mb_type_info[mb_type].partition_count;
4295 mb_type= b_mb_type_info[mb_type].type;
4298 goto decode_intra_mb;
4300 }else if(h->slice_type_nos == FF_P_TYPE){
4302 partition_count= p_mb_type_info[mb_type].partition_count;
4303 mb_type= p_mb_type_info[mb_type].type;
4306 goto decode_intra_mb;
4309 assert(h->slice_type_nos == FF_I_TYPE);
4310 if(h->slice_type == FF_SI_TYPE && mb_type)
4314 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4318 cbp= i_mb_type_info[mb_type].cbp;
4319 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4320 mb_type= i_mb_type_info[mb_type].type;
4324 mb_type |= MB_TYPE_INTERLACED;
4326 h->slice_table[ mb_xy ]= h->slice_num;
4328 if(IS_INTRA_PCM(mb_type)){
4331 // We assume these blocks are very rare so we do not optimize it.
4332 align_get_bits(&s->gb);
4334 // The pixels are stored in the same order as levels in h->mb array.
4335 for(x=0; x < (CHROMA ? 384 : 256); x++){
4336 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4339 // In deblocking, the quantizer is 0
4340 s->current_picture.qscale_table[mb_xy]= 0;
4341 // All coeffs are present
4342 memset(h->non_zero_count[mb_xy], 16, 16);
4344 s->current_picture.mb_type[mb_xy]= mb_type;
4349 h->ref_count[0] <<= 1;
4350 h->ref_count[1] <<= 1;
4353 fill_caches(h, mb_type, 0);
4356 if(IS_INTRA(mb_type)){
4358 // init_top_left_availability(h);
4359 if(IS_INTRA4x4(mb_type)){
4362 if(dct8x8_allowed && get_bits1(&s->gb)){
4363 mb_type |= MB_TYPE_8x8DCT;
4367 // fill_intra4x4_pred_table(h);
4368 for(i=0; i<16; i+=di){
4369 int mode= pred_intra_mode(h, i);
4371 if(!get_bits1(&s->gb)){
4372 const int rem_mode= get_bits(&s->gb, 3);
4373 mode = rem_mode + (rem_mode >= mode);
4377 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4379 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4381 write_back_intra_pred_mode(h);
4382 if( check_intra4x4_pred_mode(h) < 0)
4385 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4386 if(h->intra16x16_pred_mode < 0)
4390 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4393 h->chroma_pred_mode= pred_mode;
4395 }else if(partition_count==4){
4396 int i, j, sub_partition_count[4], list, ref[2][4];
4398 if(h->slice_type_nos == FF_B_TYPE){
4400 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4401 if(h->sub_mb_type[i] >=13){
4402 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4405 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4406 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4408 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4409 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4410 pred_direct_motion(h, &mb_type);
4411 h->ref_cache[0][scan8[4]] =
4412 h->ref_cache[1][scan8[4]] =
4413 h->ref_cache[0][scan8[12]] =
4414 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4417 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4419 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4420 if(h->sub_mb_type[i] >=4){
4421 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4424 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4425 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4429 for(list=0; list<h->list_count; list++){
4430 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4432 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4433 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4434 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4436 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4448 dct8x8_allowed = get_dct8x8_allowed(h);
4450 for(list=0; list<h->list_count; list++){
4452 if(IS_DIRECT(h->sub_mb_type[i])) {
4453 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4456 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4457 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4459 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4460 const int sub_mb_type= h->sub_mb_type[i];
4461 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4462 for(j=0; j<sub_partition_count[i]; j++){
4464 const int index= 4*i + block_width*j;
4465 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4466 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4467 mx += get_se_golomb(&s->gb);
4468 my += get_se_golomb(&s->gb);
4469 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4471 if(IS_SUB_8X8(sub_mb_type)){
4473 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4475 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4476 }else if(IS_SUB_8X4(sub_mb_type)){
4477 mv_cache[ 1 ][0]= mx;
4478 mv_cache[ 1 ][1]= my;
4479 }else if(IS_SUB_4X8(sub_mb_type)){
4480 mv_cache[ 8 ][0]= mx;
4481 mv_cache[ 8 ][1]= my;
4483 mv_cache[ 0 ][0]= mx;
4484 mv_cache[ 0 ][1]= my;
4487 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4493 }else if(IS_DIRECT(mb_type)){
4494 pred_direct_motion(h, &mb_type);
4495 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4497 int list, mx, my, i;
4498 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4499 if(IS_16X16(mb_type)){
4500 for(list=0; list<h->list_count; list++){
4502 if(IS_DIR(mb_type, 0, list)){
4503 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4504 if(val >= h->ref_count[list]){
4505 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4509 val= LIST_NOT_USED&0xFF;
4510 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4512 for(list=0; list<h->list_count; list++){
4514 if(IS_DIR(mb_type, 0, list)){
4515 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4516 mx += get_se_golomb(&s->gb);
4517 my += get_se_golomb(&s->gb);
4518 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4520 val= pack16to32(mx,my);
4523 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4526 else if(IS_16X8(mb_type)){
4527 for(list=0; list<h->list_count; list++){
4530 if(IS_DIR(mb_type, i, list)){
4531 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4532 if(val >= h->ref_count[list]){
4533 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4537 val= LIST_NOT_USED&0xFF;
4538 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4541 for(list=0; list<h->list_count; list++){
4544 if(IS_DIR(mb_type, i, list)){
4545 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4546 mx += get_se_golomb(&s->gb);
4547 my += get_se_golomb(&s->gb);
4548 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4550 val= pack16to32(mx,my);
4553 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4557 assert(IS_8X16(mb_type));
4558 for(list=0; list<h->list_count; list++){
4561 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4562 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4563 if(val >= h->ref_count[list]){
4564 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4568 val= LIST_NOT_USED&0xFF;
4569 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4572 for(list=0; list<h->list_count; list++){
4575 if(IS_DIR(mb_type, i, list)){
4576 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4577 mx += get_se_golomb(&s->gb);
4578 my += get_se_golomb(&s->gb);
4579 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4581 val= pack16to32(mx,my);
4584 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4590 if(IS_INTER(mb_type))
4591 write_back_motion(h, mb_type);
4593 if(!IS_INTRA16x16(mb_type)){
4594 cbp= get_ue_golomb(&s->gb);
4596 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4601 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4602 else cbp= golomb_to_inter_cbp [cbp];
4604 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4605 else cbp= golomb_to_inter_cbp_gray[cbp];
4610 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4611 if(get_bits1(&s->gb)){
4612 mb_type |= MB_TYPE_8x8DCT;
4613 h->cbp_table[mb_xy]= cbp;
4616 s->current_picture.mb_type[mb_xy]= mb_type;
4618 if(cbp || IS_INTRA16x16(mb_type)){
4619 int i8x8, i4x4, chroma_idx;
4621 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4622 const uint8_t *scan, *scan8x8, *dc_scan;
4624 // fill_non_zero_count_cache(h);
4626 if(IS_INTERLACED(mb_type)){
4627 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4628 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4629 dc_scan= luma_dc_field_scan;
4631 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4632 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4633 dc_scan= luma_dc_zigzag_scan;
4636 dquant= get_se_golomb(&s->gb);
4638 if( dquant > 25 || dquant < -26 ){
4639 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4643 s->qscale += dquant;
4644 if(((unsigned)s->qscale) > 51){
4645 if(s->qscale<0) s->qscale+= 52;
4646 else s->qscale-= 52;
4649 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4650 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4651 if(IS_INTRA16x16(mb_type)){
4652 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4653 return -1; //FIXME continue if partitioned and other return -1 too
4656 assert((cbp&15) == 0 || (cbp&15) == 15);
4659 for(i8x8=0; i8x8<4; i8x8++){
4660 for(i4x4=0; i4x4<4; i4x4++){
4661 const int index= i4x4 + 4*i8x8;
4662 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4668 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4671 for(i8x8=0; i8x8<4; i8x8++){
4672 if(cbp & (1<<i8x8)){
4673 if(IS_8x8DCT(mb_type)){
4674 DCTELEM *buf = &h->mb[64*i8x8];
4676 for(i4x4=0; i4x4<4; i4x4++){
4677 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4678 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4681 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4682 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4684 for(i4x4=0; i4x4<4; i4x4++){
4685 const int index= i4x4 + 4*i8x8;
4687 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4693 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4694 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4700 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4701 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4707 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4708 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4709 for(i4x4=0; i4x4<4; i4x4++){
4710 const int index= 16 + 4*chroma_idx + i4x4;
4711 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4717 uint8_t * const nnz= &h->non_zero_count_cache[0];
4718 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4719 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4722 uint8_t * const nnz= &h->non_zero_count_cache[0];
4723 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4724 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4725 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4727 s->current_picture.qscale_table[mb_xy]= s->qscale;
4728 write_back_non_zero_count(h);
4731 h->ref_count[0] >>= 1;
4732 h->ref_count[1] >>= 1;
4738 static int decode_cabac_field_decoding_flag(H264Context *h) {
4739 MpegEncContext * const s = &h->s;
4740 const int mb_x = s->mb_x;
4741 const int mb_y = s->mb_y & ~1;
4742 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4743 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4745 unsigned int ctx = 0;
4747 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4750 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4754 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4757 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4758 uint8_t *state= &h->cabac_state[ctx_base];
4762 MpegEncContext * const s = &h->s;
4763 const int mba_xy = h->left_mb_xy[0];
4764 const int mbb_xy = h->top_mb_xy;
4766 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4768 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4770 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4771 return 0; /* I4x4 */
4774 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4775 return 0; /* I4x4 */
4778 if( get_cabac_terminate( &h->cabac ) )
4779 return 25; /* PCM */
4781 mb_type = 1; /* I16x16 */
4782 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4783 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4784 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4785 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4786 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4790 static int decode_cabac_mb_type( H264Context *h ) {
4791 MpegEncContext * const s = &h->s;
4793 if( h->slice_type_nos == FF_I_TYPE ) {
4794 return decode_cabac_intra_mb_type(h, 3, 1);
4795 } else if( h->slice_type_nos == FF_P_TYPE ) {
4796 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4798 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4799 /* P_L0_D16x16, P_8x8 */
4800 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4802 /* P_L0_D8x16, P_L0_D16x8 */
4803 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4806 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4809 const int mba_xy = h->left_mb_xy[0];
4810 const int mbb_xy = h->top_mb_xy;
4813 assert(h->slice_type_nos == FF_B_TYPE);
4815 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4817 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4820 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4821 return 0; /* B_Direct_16x16 */
4823 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4824 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4827 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4828 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4829 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4830 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4832 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4833 else if( bits == 13 ) {
4834 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4835 } else if( bits == 14 )
4836 return 11; /* B_L1_L0_8x16 */
4837 else if( bits == 15 )
4838 return 22; /* B_8x8 */
4840 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4841 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4845 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4846 MpegEncContext * const s = &h->s;
4850 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4851 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4854 && h->slice_table[mba_xy] == h->slice_num
4855 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4856 mba_xy += s->mb_stride;
4858 mbb_xy = mb_xy - s->mb_stride;
4860 && h->slice_table[mbb_xy] == h->slice_num
4861 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4862 mbb_xy -= s->mb_stride;
4864 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4866 int mb_xy = h->mb_xy;
4868 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4871 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4873 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4876 if( h->slice_type_nos == FF_B_TYPE )
4878 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4881 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4884 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4887 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4888 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4889 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4891 if( mode >= pred_mode )
4897 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4898 const int mba_xy = h->left_mb_xy[0];
4899 const int mbb_xy = h->top_mb_xy;
4903 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4904 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4907 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4910 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4913 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4915 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4921 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4922 int cbp_b, cbp_a, ctx, cbp = 0;
4924 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4925 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4927 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4928 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4929 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4930 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4931 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4932 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4933 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4934 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4937 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4941 cbp_a = (h->left_cbp>>4)&0x03;
4942 cbp_b = (h-> top_cbp>>4)&0x03;
4945 if( cbp_a > 0 ) ctx++;
4946 if( cbp_b > 0 ) ctx += 2;
4947 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4951 if( cbp_a == 2 ) ctx++;
4952 if( cbp_b == 2 ) ctx += 2;
4953 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4955 static int decode_cabac_mb_dqp( H264Context *h) {
4959 if( h->last_qscale_diff != 0 )
4962 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4968 if(val > 102) //prevent infinite loop
4975 return -(val + 1)/2;
4977 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4978 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4980 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4982 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4986 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4988 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4989 return 0; /* B_Direct_8x8 */
4990 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4991 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4993 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4994 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4995 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4998 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4999 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5003 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5004 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5007 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5008 int refa = h->ref_cache[list][scan8[n] - 1];
5009 int refb = h->ref_cache[list][scan8[n] - 8];
5013 if( h->slice_type_nos == FF_B_TYPE) {
5014 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5016 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5025 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5031 if(ref >= 32 /*h->ref_list[list]*/){
5038 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5039 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5040 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5041 int ctxbase = (l == 0) ? 40 : 47;
5043 int ctx = (amvd>2) + (amvd>32);
5045 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5050 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5058 while( get_cabac_bypass( &h->cabac ) ) {
5062 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5067 if( get_cabac_bypass( &h->cabac ) )
5071 return get_cabac_bypass_sign( &h->cabac, -mvd );
5074 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5080 nza = h->left_cbp&0x100;
5081 nzb = h-> top_cbp&0x100;
5083 nza = (h->left_cbp>>(6+idx))&0x01;
5084 nzb = (h-> top_cbp>>(6+idx))&0x01;
5088 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5089 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5091 assert(cat == 1 || cat == 2);
5092 nza = h->non_zero_count_cache[scan8[idx] - 1];
5093 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5103 return ctx + 4 * cat;
5106 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5107 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5109 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5110 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5113 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5114 static const int significant_coeff_flag_offset[2][6] = {
5115 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5116 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5118 static const int last_coeff_flag_offset[2][6] = {
5119 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5120 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5122 static const int coeff_abs_level_m1_offset[6] = {
5123 227+0, 227+10, 227+20, 227+30, 227+39, 426
5125 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5126 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5127 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5128 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5129 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5130 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5131 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5132 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5133 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5135 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5136 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5137 * map node ctx => cabac ctx for level=1 */
5138 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5139 /* map node ctx => cabac ctx for level>1 */
5140 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5141 static const uint8_t coeff_abs_level_transition[2][8] = {
5142 /* update node ctx after decoding a level=1 */
5143 { 1, 2, 3, 3, 4, 5, 6, 7 },
5144 /* update node ctx after decoding a level>1 */
5145 { 4, 4, 4, 4, 5, 6, 7, 7 }
5151 int coeff_count = 0;
5154 uint8_t *significant_coeff_ctx_base;
5155 uint8_t *last_coeff_ctx_base;
5156 uint8_t *abs_level_m1_ctx_base;
5159 #define CABAC_ON_STACK
5161 #ifdef CABAC_ON_STACK
5164 cc.range = h->cabac.range;
5165 cc.low = h->cabac.low;
5166 cc.bytestream= h->cabac.bytestream;
5168 #define CC &h->cabac
5172 /* cat: 0-> DC 16x16 n = 0
5173 * 1-> AC 16x16 n = luma4x4idx
5174 * 2-> Luma4x4 n = luma4x4idx
5175 * 3-> DC Chroma n = iCbCr
5176 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5177 * 5-> Luma8x8 n = 4 * luma8x8idx
5180 /* read coded block flag */
5181 if( is_dc || cat != 5 ) {
5182 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5185 h->non_zero_count_cache[scan8[16+n]] = 0;
5187 h->non_zero_count_cache[scan8[n]] = 0;
5190 #ifdef CABAC_ON_STACK
5191 h->cabac.range = cc.range ;
5192 h->cabac.low = cc.low ;
5193 h->cabac.bytestream= cc.bytestream;
5199 significant_coeff_ctx_base = h->cabac_state
5200 + significant_coeff_flag_offset[MB_FIELD][cat];
5201 last_coeff_ctx_base = h->cabac_state
5202 + last_coeff_flag_offset[MB_FIELD][cat];
5203 abs_level_m1_ctx_base = h->cabac_state
5204 + coeff_abs_level_m1_offset[cat];
5206 if( !is_dc && cat == 5 ) {
5207 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5208 for(last= 0; last < coefs; last++) { \
5209 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5210 if( get_cabac( CC, sig_ctx )) { \
5211 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5212 index[coeff_count++] = last; \
5213 if( get_cabac( CC, last_ctx ) ) { \
5219 if( last == max_coeff -1 ) {\
5220 index[coeff_count++] = last;\
5222 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5223 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5224 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5226 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5228 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5230 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5233 assert(coeff_count > 0);
5237 h->cbp_table[h->mb_xy] |= 0x100;
5239 h->cbp_table[h->mb_xy] |= 0x40 << n;
5242 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5244 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5246 assert( cat == 1 || cat == 2 );
5247 h->non_zero_count_cache[scan8[n]] = coeff_count;
5252 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5254 int j= scantable[index[--coeff_count]];
5256 if( get_cabac( CC, ctx ) == 0 ) {
5257 node_ctx = coeff_abs_level_transition[0][node_ctx];
5259 block[j] = get_cabac_bypass_sign( CC, -1);
5261 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5265 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5266 node_ctx = coeff_abs_level_transition[1][node_ctx];
5268 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5272 if( coeff_abs >= 15 ) {
5274 while( get_cabac_bypass( CC ) ) {
5280 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5286 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5288 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5291 } while( coeff_count );
5292 #ifdef CABAC_ON_STACK
5293 h->cabac.range = cc.range ;
5294 h->cabac.low = cc.low ;
5295 h->cabac.bytestream= cc.bytestream;
5300 #ifndef CONFIG_SMALL
5301 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5302 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5305 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5306 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5310 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5312 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5314 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5315 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5319 static inline void compute_mb_neighbors(H264Context *h)
5321 MpegEncContext * const s = &h->s;
5322 const int mb_xy = h->mb_xy;
5323 h->top_mb_xy = mb_xy - s->mb_stride;
5324 h->left_mb_xy[0] = mb_xy - 1;
5326 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5327 const int top_pair_xy = pair_xy - s->mb_stride;
5328 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5329 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5330 const int curr_mb_frame_flag = !MB_FIELD;
5331 const int bottom = (s->mb_y & 1);
5333 ? !curr_mb_frame_flag // bottom macroblock
5334 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5336 h->top_mb_xy -= s->mb_stride;
5338 if (left_mb_frame_flag != curr_mb_frame_flag) {
5339 h->left_mb_xy[0] = pair_xy - 1;
5341 } else if (FIELD_PICTURE) {
5342 h->top_mb_xy -= s->mb_stride;
5348 * decodes a macroblock
5349 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5351 static int decode_mb_cabac(H264Context *h) {
5352 MpegEncContext * const s = &h->s;
5354 int mb_type, partition_count, cbp = 0;
5355 int dct8x8_allowed= h->pps.transform_8x8_mode;
5357 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5359 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5361 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5362 if( h->slice_type_nos != FF_I_TYPE ) {
5364 /* a skipped mb needs the aff flag from the following mb */
5365 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5366 predict_field_decoding_flag(h);
5367 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5368 skip = h->next_mb_skipped;
5370 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5371 /* read skip flags */
5373 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5374 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5375 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5376 if(h->next_mb_skipped)
5377 predict_field_decoding_flag(h);
5379 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5384 h->cbp_table[mb_xy] = 0;
5385 h->chroma_pred_mode_table[mb_xy] = 0;
5386 h->last_qscale_diff = 0;
5393 if( (s->mb_y&1) == 0 )
5395 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5398 h->prev_mb_skipped = 0;
5400 compute_mb_neighbors(h);
5401 mb_type = decode_cabac_mb_type( h );
5402 assert(mb_type >= 0);
5404 if( h->slice_type_nos == FF_B_TYPE ) {
5406 partition_count= b_mb_type_info[mb_type].partition_count;
5407 mb_type= b_mb_type_info[mb_type].type;
5410 goto decode_intra_mb;
5412 } else if( h->slice_type_nos == FF_P_TYPE ) {
5414 partition_count= p_mb_type_info[mb_type].partition_count;
5415 mb_type= p_mb_type_info[mb_type].type;
5418 goto decode_intra_mb;
5421 if(h->slice_type == FF_SI_TYPE && mb_type)
5423 assert(h->slice_type_nos == FF_I_TYPE);
5425 partition_count = 0;
5426 cbp= i_mb_type_info[mb_type].cbp;
5427 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5428 mb_type= i_mb_type_info[mb_type].type;
5431 mb_type |= MB_TYPE_INTERLACED;
5433 h->slice_table[ mb_xy ]= h->slice_num;
5435 if(IS_INTRA_PCM(mb_type)) {
5438 // We assume these blocks are very rare so we do not optimize it.
5439 // FIXME The two following lines get the bitstream position in the cabac
5440 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5441 ptr= h->cabac.bytestream;
5442 if(h->cabac.low&0x1) ptr--;
5444 if(h->cabac.low&0x1FF) ptr--;
5447 // The pixels are stored in the same order as levels in h->mb array.
5448 memcpy(h->mb, ptr, 256); ptr+=256;
5450 memcpy(h->mb+128, ptr, 128); ptr+=128;
5453 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5455 // All blocks are present
5456 h->cbp_table[mb_xy] = 0x1ef;
5457 h->chroma_pred_mode_table[mb_xy] = 0;
5458 // In deblocking, the quantizer is 0
5459 s->current_picture.qscale_table[mb_xy]= 0;
5460 // All coeffs are present
5461 memset(h->non_zero_count[mb_xy], 16, 16);
5462 s->current_picture.mb_type[mb_xy]= mb_type;
5463 h->last_qscale_diff = 0;
5468 h->ref_count[0] <<= 1;
5469 h->ref_count[1] <<= 1;
5472 fill_caches(h, mb_type, 0);
5474 if( IS_INTRA( mb_type ) ) {
5476 if( IS_INTRA4x4( mb_type ) ) {
5477 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5478 mb_type |= MB_TYPE_8x8DCT;
5479 for( i = 0; i < 16; i+=4 ) {
5480 int pred = pred_intra_mode( h, i );
5481 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5482 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5485 for( i = 0; i < 16; i++ ) {
5486 int pred = pred_intra_mode( h, i );
5487 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5489 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5492 write_back_intra_pred_mode(h);
5493 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5495 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5496 if( h->intra16x16_pred_mode < 0 ) return -1;
5499 h->chroma_pred_mode_table[mb_xy] =
5500 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5502 pred_mode= check_intra_pred_mode( h, pred_mode );
5503 if( pred_mode < 0 ) return -1;
5504 h->chroma_pred_mode= pred_mode;
5506 } else if( partition_count == 4 ) {
5507 int i, j, sub_partition_count[4], list, ref[2][4];
5509 if( h->slice_type_nos == FF_B_TYPE ) {
5510 for( i = 0; i < 4; i++ ) {
5511 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5512 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5513 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5515 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5516 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5517 pred_direct_motion(h, &mb_type);
5518 h->ref_cache[0][scan8[4]] =
5519 h->ref_cache[1][scan8[4]] =
5520 h->ref_cache[0][scan8[12]] =
5521 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5522 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5523 for( i = 0; i < 4; i++ )
5524 if( IS_DIRECT(h->sub_mb_type[i]) )
5525 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5529 for( i = 0; i < 4; i++ ) {
5530 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5531 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5532 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5536 for( list = 0; list < h->list_count; list++ ) {
5537 for( i = 0; i < 4; i++ ) {
5538 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5539 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5540 if( h->ref_count[list] > 1 ){
5541 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5542 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5543 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5551 h->ref_cache[list][ scan8[4*i]+1 ]=
5552 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5557 dct8x8_allowed = get_dct8x8_allowed(h);
5559 for(list=0; list<h->list_count; list++){
5561 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5562 if(IS_DIRECT(h->sub_mb_type[i])){
5563 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5567 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5568 const int sub_mb_type= h->sub_mb_type[i];
5569 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5570 for(j=0; j<sub_partition_count[i]; j++){
5573 const int index= 4*i + block_width*j;
5574 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5575 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5576 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5578 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5579 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5580 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5582 if(IS_SUB_8X8(sub_mb_type)){
5584 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5586 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5589 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5591 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5592 }else if(IS_SUB_8X4(sub_mb_type)){
5593 mv_cache[ 1 ][0]= mx;
5594 mv_cache[ 1 ][1]= my;
5596 mvd_cache[ 1 ][0]= mx - mpx;
5597 mvd_cache[ 1 ][1]= my - mpy;
5598 }else if(IS_SUB_4X8(sub_mb_type)){
5599 mv_cache[ 8 ][0]= mx;
5600 mv_cache[ 8 ][1]= my;
5602 mvd_cache[ 8 ][0]= mx - mpx;
5603 mvd_cache[ 8 ][1]= my - mpy;
5605 mv_cache[ 0 ][0]= mx;
5606 mv_cache[ 0 ][1]= my;
5608 mvd_cache[ 0 ][0]= mx - mpx;
5609 mvd_cache[ 0 ][1]= my - mpy;
5612 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5613 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5614 p[0] = p[1] = p[8] = p[9] = 0;
5615 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5619 } else if( IS_DIRECT(mb_type) ) {
5620 pred_direct_motion(h, &mb_type);
5621 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5622 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5623 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5625 int list, mx, my, i, mpx, mpy;
5626 if(IS_16X16(mb_type)){
5627 for(list=0; list<h->list_count; list++){
5628 if(IS_DIR(mb_type, 0, list)){
5630 if(h->ref_count[list] > 1){
5631 ref= decode_cabac_mb_ref(h, list, 0);
5632 if(ref >= (unsigned)h->ref_count[list]){
5633 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5638 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5640 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5642 for(list=0; list<h->list_count; list++){
5643 if(IS_DIR(mb_type, 0, list)){
5644 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5646 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5647 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5648 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5650 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5651 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5653 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5656 else if(IS_16X8(mb_type)){
5657 for(list=0; list<h->list_count; list++){
5659 if(IS_DIR(mb_type, i, list)){
5661 if(h->ref_count[list] > 1){
5662 ref= decode_cabac_mb_ref( h, list, 8*i );
5663 if(ref >= (unsigned)h->ref_count[list]){
5664 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5669 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5671 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5674 for(list=0; list<h->list_count; list++){
5676 if(IS_DIR(mb_type, i, list)){
5677 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5678 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5679 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5680 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5682 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5683 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5685 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5686 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5691 assert(IS_8X16(mb_type));
5692 for(list=0; list<h->list_count; list++){
5694 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5696 if(h->ref_count[list] > 1){
5697 ref= decode_cabac_mb_ref( h, list, 4*i );
5698 if(ref >= (unsigned)h->ref_count[list]){
5699 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5704 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5706 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5709 for(list=0; list<h->list_count; list++){
5711 if(IS_DIR(mb_type, i, list)){
5712 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5713 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5714 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5716 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5717 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5718 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5720 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5721 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5728 if( IS_INTER( mb_type ) ) {
5729 h->chroma_pred_mode_table[mb_xy] = 0;
5730 write_back_motion( h, mb_type );
5733 if( !IS_INTRA16x16( mb_type ) ) {
5734 cbp = decode_cabac_mb_cbp_luma( h );
5736 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5739 h->cbp_table[mb_xy] = h->cbp = cbp;
5741 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5742 if( decode_cabac_mb_transform_size( h ) )
5743 mb_type |= MB_TYPE_8x8DCT;
5745 s->current_picture.mb_type[mb_xy]= mb_type;
5747 if( cbp || IS_INTRA16x16( mb_type ) ) {
5748 const uint8_t *scan, *scan8x8, *dc_scan;
5749 const uint32_t *qmul;
5752 if(IS_INTERLACED(mb_type)){
5753 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5754 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5755 dc_scan= luma_dc_field_scan;
5757 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5758 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5759 dc_scan= luma_dc_zigzag_scan;
5762 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5763 if( dqp == INT_MIN ){
5764 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5768 if(((unsigned)s->qscale) > 51){
5769 if(s->qscale<0) s->qscale+= 52;
5770 else s->qscale-= 52;
5772 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5773 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5775 if( IS_INTRA16x16( mb_type ) ) {
5777 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5778 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5781 qmul = h->dequant4_coeff[0][s->qscale];
5782 for( i = 0; i < 16; i++ ) {
5783 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5784 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5787 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5791 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5792 if( cbp & (1<<i8x8) ) {
5793 if( IS_8x8DCT(mb_type) ) {
5794 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5795 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5797 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5798 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5799 const int index = 4*i8x8 + i4x4;
5800 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5802 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5803 //STOP_TIMER("decode_residual")
5807 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5808 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5815 for( c = 0; c < 2; c++ ) {
5816 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5817 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5823 for( c = 0; c < 2; c++ ) {
5824 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5825 for( i = 0; i < 4; i++ ) {
5826 const int index = 16 + 4 * c + i;
5827 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5828 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5832 uint8_t * const nnz= &h->non_zero_count_cache[0];
5833 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5834 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5837 uint8_t * const nnz= &h->non_zero_count_cache[0];
5838 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5839 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5840 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5841 h->last_qscale_diff = 0;
5844 s->current_picture.qscale_table[mb_xy]= s->qscale;
5845 write_back_non_zero_count(h);
5848 h->ref_count[0] >>= 1;
5849 h->ref_count[1] >>= 1;
5856 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5858 const int index_a = qp + h->slice_alpha_c0_offset;
5859 const int alpha = (alpha_table+52)[index_a];
5860 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5865 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5866 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5868 /* 16px edge length, because bS=4 is triggered by being at
5869 * the edge of an intra MB, so all 4 bS are the same */
5870 for( d = 0; d < 16; d++ ) {
5871 const int p0 = pix[-1];
5872 const int p1 = pix[-2];
5873 const int p2 = pix[-3];
5875 const int q0 = pix[0];
5876 const int q1 = pix[1];
5877 const int q2 = pix[2];
5879 if( FFABS( p0 - q0 ) < alpha &&
5880 FFABS( p1 - p0 ) < beta &&
5881 FFABS( q1 - q0 ) < beta ) {
5883 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5884 if( FFABS( p2 - p0 ) < beta)
5886 const int p3 = pix[-4];
5888 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5889 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5890 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5893 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5895 if( FFABS( q2 - q0 ) < beta)
5897 const int q3 = pix[3];
5899 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5900 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5901 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5904 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5908 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5909 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5911 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5917 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5919 const int index_a = qp + h->slice_alpha_c0_offset;
5920 const int alpha = (alpha_table+52)[index_a];
5921 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5926 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5927 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5929 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5933 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5935 for( i = 0; i < 16; i++, pix += stride) {
5941 int bS_index = (i >> 1);
5944 bS_index |= (i & 1);
5947 if( bS[bS_index] == 0 ) {
5951 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5952 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5953 alpha = (alpha_table+52)[index_a];
5954 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5956 if( bS[bS_index] < 4 ) {
5957 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5958 const int p0 = pix[-1];
5959 const int p1 = pix[-2];
5960 const int p2 = pix[-3];
5961 const int q0 = pix[0];
5962 const int q1 = pix[1];
5963 const int q2 = pix[2];
5965 if( FFABS( p0 - q0 ) < alpha &&
5966 FFABS( p1 - p0 ) < beta &&
5967 FFABS( q1 - q0 ) < beta ) {
5971 if( FFABS( p2 - p0 ) < beta ) {
5972 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5975 if( FFABS( q2 - q0 ) < beta ) {
5976 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5980 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5981 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5982 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5983 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5986 const int p0 = pix[-1];
5987 const int p1 = pix[-2];
5988 const int p2 = pix[-3];
5990 const int q0 = pix[0];
5991 const int q1 = pix[1];
5992 const int q2 = pix[2];
5994 if( FFABS( p0 - q0 ) < alpha &&
5995 FFABS( p1 - p0 ) < beta &&
5996 FFABS( q1 - q0 ) < beta ) {
5998 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5999 if( FFABS( p2 - p0 ) < beta)
6001 const int p3 = pix[-4];
6003 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6004 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6005 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6008 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6010 if( FFABS( q2 - q0 ) < beta)
6012 const int q3 = pix[3];
6014 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6015 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6016 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6019 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6023 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6024 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6026 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6031 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6033 for( i = 0; i < 8; i++, pix += stride) {
6041 if( bS[bS_index] == 0 ) {
6045 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6046 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6047 alpha = (alpha_table+52)[index_a];
6048 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6050 if( bS[bS_index] < 4 ) {
6051 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6052 const int p0 = pix[-1];
6053 const int p1 = pix[-2];
6054 const int q0 = pix[0];
6055 const int q1 = pix[1];
6057 if( FFABS( p0 - q0 ) < alpha &&
6058 FFABS( p1 - p0 ) < beta &&
6059 FFABS( q1 - q0 ) < beta ) {
6060 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6062 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6063 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6064 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6067 const int p0 = pix[-1];
6068 const int p1 = pix[-2];
6069 const int q0 = pix[0];
6070 const int q1 = pix[1];
6072 if( FFABS( p0 - q0 ) < alpha &&
6073 FFABS( p1 - p0 ) < beta &&
6074 FFABS( q1 - q0 ) < beta ) {
6076 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6077 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6078 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6084 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6086 const int index_a = qp + h->slice_alpha_c0_offset;
6087 const int alpha = (alpha_table+52)[index_a];
6088 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6089 const int pix_next = stride;
6094 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6095 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6097 /* 16px edge length, see filter_mb_edgev */
6098 for( d = 0; d < 16; d++ ) {
6099 const int p0 = pix[-1*pix_next];
6100 const int p1 = pix[-2*pix_next];
6101 const int p2 = pix[-3*pix_next];
6102 const int q0 = pix[0];
6103 const int q1 = pix[1*pix_next];
6104 const int q2 = pix[2*pix_next];
6106 if( FFABS( p0 - q0 ) < alpha &&
6107 FFABS( p1 - p0 ) < beta &&
6108 FFABS( q1 - q0 ) < beta ) {
6110 const int p3 = pix[-4*pix_next];
6111 const int q3 = pix[ 3*pix_next];
6113 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6114 if( FFABS( p2 - p0 ) < beta) {
6116 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6117 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6118 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6121 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6123 if( FFABS( q2 - q0 ) < beta) {
6125 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6126 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6127 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6130 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6134 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6135 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6137 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6144 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6146 const int index_a = qp + h->slice_alpha_c0_offset;
6147 const int alpha = (alpha_table+52)[index_a];
6148 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6153 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6154 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6156 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6160 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6161 MpegEncContext * const s = &h->s;
6162 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6164 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6168 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6169 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6170 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6171 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6172 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6175 assert(!FRAME_MBAFF);
6177 mb_type = s->current_picture.mb_type[mb_xy];
6178 qp = s->current_picture.qscale_table[mb_xy];
6179 qp0 = s->current_picture.qscale_table[mb_xy-1];
6180 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6181 qpc = get_chroma_qp( h, 0, qp );
6182 qpc0 = get_chroma_qp( h, 0, qp0 );
6183 qpc1 = get_chroma_qp( h, 0, qp1 );
6184 qp0 = (qp + qp0 + 1) >> 1;
6185 qp1 = (qp + qp1 + 1) >> 1;
6186 qpc0 = (qpc + qpc0 + 1) >> 1;
6187 qpc1 = (qpc + qpc1 + 1) >> 1;
6188 qp_thresh = 15 - h->slice_alpha_c0_offset;
6189 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6190 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6193 if( IS_INTRA(mb_type) ) {
6194 int16_t bS4[4] = {4,4,4,4};
6195 int16_t bS3[4] = {3,3,3,3};
6196 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6197 if( IS_8x8DCT(mb_type) ) {
6198 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6199 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6200 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6201 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6203 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6204 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6205 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6206 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6207 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6208 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6209 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6210 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6212 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6213 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6214 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6215 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6216 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6217 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6218 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6219 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6222 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6223 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6225 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6227 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6229 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6230 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6231 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6232 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6234 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6235 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6236 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6237 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6239 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6240 bSv[0][0] = 0x0004000400040004ULL;
6241 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6242 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6244 #define FILTER(hv,dir,edge)\
6245 if(bSv[dir][edge]) {\
6246 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6248 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6249 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6255 } else if( IS_8x8DCT(mb_type) ) {
6275 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6276 MpegEncContext * const s = &h->s;
6278 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6279 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6280 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6281 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6282 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6284 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6285 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6286 // how often to recheck mv-based bS when iterating between edges
6287 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6288 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6289 // how often to recheck mv-based bS when iterating along each edge
6290 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6292 if (first_vertical_edge_done) {
6296 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6299 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6300 && !IS_INTERLACED(mb_type)
6301 && IS_INTERLACED(mbm_type)
6303 // This is a special case in the norm where the filtering must
6304 // be done twice (one each of the field) even if we are in a
6305 // frame macroblock.
6307 static const int nnz_idx[4] = {4,5,6,3};
6308 unsigned int tmp_linesize = 2 * linesize;
6309 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6310 int mbn_xy = mb_xy - 2 * s->mb_stride;
6315 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6316 if( IS_INTRA(mb_type) ||
6317 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6318 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6320 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6321 for( i = 0; i < 4; i++ ) {
6322 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6323 mbn_nnz[nnz_idx[i]] != 0 )
6329 // Do not use s->qscale as luma quantizer because it has not the same
6330 // value in IPCM macroblocks.
6331 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6332 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6333 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6334 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6335 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6336 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6337 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6338 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6345 for( edge = start; edge < edges; edge++ ) {
6346 /* mbn_xy: neighbor macroblock */
6347 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6348 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6349 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6353 if( (edge&1) && IS_8x8DCT(mb_type) )
6356 if( IS_INTRA(mb_type) ||
6357 IS_INTRA(mbn_type) ) {
6360 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6361 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6370 bS[0] = bS[1] = bS[2] = bS[3] = value;
6375 if( edge & mask_edge ) {
6376 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6379 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6380 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6383 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6384 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6385 int bn_idx= b_idx - (dir ? 8:1);
6388 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6389 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6390 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6391 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6394 if(h->slice_type_nos == FF_B_TYPE && v){
6396 for( l = 0; !v && l < 2; l++ ) {
6398 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6399 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6400 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6404 bS[0] = bS[1] = bS[2] = bS[3] = v;
6410 for( i = 0; i < 4; i++ ) {
6411 int x = dir == 0 ? edge : i;
6412 int y = dir == 0 ? i : edge;
6413 int b_idx= 8 + 4 + x + 8*y;
6414 int bn_idx= b_idx - (dir ? 8:1);
6416 if( h->non_zero_count_cache[b_idx] |
6417 h->non_zero_count_cache[bn_idx] ) {
6423 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6424 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6425 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6426 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6432 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6434 for( l = 0; l < 2; l++ ) {
6436 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6437 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6438 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6447 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6452 // Do not use s->qscale as luma quantizer because it has not the same
6453 // value in IPCM macroblocks.
6454 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6455 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6456 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6457 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6459 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6460 if( (edge&1) == 0 ) {
6461 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6462 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6463 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6464 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6467 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6468 if( (edge&1) == 0 ) {
6469 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6470 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6471 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6472 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6478 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6479 MpegEncContext * const s = &h->s;
6480 const int mb_xy= mb_x + mb_y*s->mb_stride;
6481 const int mb_type = s->current_picture.mb_type[mb_xy];
6482 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6483 int first_vertical_edge_done = 0;
6486 //for sufficiently low qp, filtering wouldn't do anything
6487 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6489 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6490 int qp = s->current_picture.qscale_table[mb_xy];
6492 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6493 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6498 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6499 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6500 int top_type, left_type[2];
6501 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6502 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6503 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6505 if(IS_8x8DCT(top_type)){
6506 h->non_zero_count_cache[4+8*0]=
6507 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6508 h->non_zero_count_cache[6+8*0]=
6509 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6511 if(IS_8x8DCT(left_type[0])){
6512 h->non_zero_count_cache[3+8*1]=
6513 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6515 if(IS_8x8DCT(left_type[1])){
6516 h->non_zero_count_cache[3+8*3]=
6517 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6520 if(IS_8x8DCT(mb_type)){
6521 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6522 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6524 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6525 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6527 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6528 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6530 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6531 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6536 // left mb is in picture
6537 && h->slice_table[mb_xy-1] != 0xFFFF
6538 // and current and left pair do not have the same interlaced type
6539 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6540 // and left mb is in the same slice if deblocking_filter == 2
6541 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6542 /* First vertical edge is different in MBAFF frames
6543 * There are 8 different bS to compute and 2 different Qp
6545 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6546 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6551 int mb_qp, mbn0_qp, mbn1_qp;
6553 first_vertical_edge_done = 1;
6555 if( IS_INTRA(mb_type) )
6556 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6558 for( i = 0; i < 8; i++ ) {
6559 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6561 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6563 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6564 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6565 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6567 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6574 mb_qp = s->current_picture.qscale_table[mb_xy];
6575 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6576 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6577 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6578 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6579 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6580 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6581 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6582 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6583 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6584 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6585 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6586 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6589 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6590 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6591 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6592 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6593 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6597 for( dir = 0; dir < 2; dir++ )
6598 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6600 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6601 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6605 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6606 H264Context *h = *(void**)arg;
6607 MpegEncContext * const s = &h->s;
6608 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6612 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6613 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6615 if( h->pps.cabac ) {
6619 align_get_bits( &s->gb );
6622 ff_init_cabac_states( &h->cabac);
6623 ff_init_cabac_decoder( &h->cabac,
6624 s->gb.buffer + get_bits_count(&s->gb)/8,
6625 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6626 /* calculate pre-state */
6627 for( i= 0; i < 460; i++ ) {
6629 if( h->slice_type_nos == FF_I_TYPE )
6630 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6632 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6635 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6637 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6642 int ret = decode_mb_cabac(h);
6644 //STOP_TIMER("decode_mb_cabac")
6646 if(ret>=0) hl_decode_mb(h);
6648 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6651 if(ret>=0) ret = decode_mb_cabac(h);
6653 if(ret>=0) hl_decode_mb(h);
6656 eos = get_cabac_terminate( &h->cabac );
6658 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6659 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6660 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6664 if( ++s->mb_x >= s->mb_width ) {
6666 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6668 if(FIELD_OR_MBAFF_PICTURE) {
6673 if( eos || s->mb_y >= s->mb_height ) {
6674 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6675 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6682 int ret = decode_mb_cavlc(h);
6684 if(ret>=0) hl_decode_mb(h);
6686 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6688 ret = decode_mb_cavlc(h);
6690 if(ret>=0) hl_decode_mb(h);
6695 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6696 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6701 if(++s->mb_x >= s->mb_width){
6703 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6705 if(FIELD_OR_MBAFF_PICTURE) {
6708 if(s->mb_y >= s->mb_height){
6709 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6711 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6712 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6716 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6723 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6724 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6725 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6726 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6730 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6739 for(;s->mb_y < s->mb_height; s->mb_y++){
6740 for(;s->mb_x < s->mb_width; s->mb_x++){
6741 int ret= decode_mb(h);
6746 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6747 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6752 if(++s->mb_x >= s->mb_width){
6754 if(++s->mb_y >= s->mb_height){
6755 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6756 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6760 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6767 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6768 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6769 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6773 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6780 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6783 return -1; //not reached
6786 static int decode_picture_timing(H264Context *h){
6787 MpegEncContext * const s = &h->s;
6788 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6789 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6790 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6792 if(h->sps.pic_struct_present_flag){
6793 unsigned int i, num_clock_ts;
6794 h->sei_pic_struct = get_bits(&s->gb, 4);
6796 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6799 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6801 for (i = 0 ; i < num_clock_ts ; i++){
6802 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6803 unsigned int full_timestamp_flag;
6804 skip_bits(&s->gb, 2); /* ct_type */
6805 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6806 skip_bits(&s->gb, 5); /* counting_type */
6807 full_timestamp_flag = get_bits(&s->gb, 1);
6808 skip_bits(&s->gb, 1); /* discontinuity_flag */
6809 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6810 skip_bits(&s->gb, 8); /* n_frames */
6811 if(full_timestamp_flag){
6812 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6813 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6814 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6816 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6817 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6818 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6819 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6820 if(get_bits(&s->gb, 1)) /* hours_flag */
6821 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6825 if(h->sps.time_offset_length > 0)
6826 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6833 static int decode_unregistered_user_data(H264Context *h, int size){
6834 MpegEncContext * const s = &h->s;
6835 uint8_t user_data[16+256];
6841 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6842 user_data[i]= get_bits(&s->gb, 8);
6846 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6847 if(e==1 && build>=0)
6848 h->x264_build= build;
6850 if(s->avctx->debug & FF_DEBUG_BUGS)
6851 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6854 skip_bits(&s->gb, 8);
6859 static int decode_sei(H264Context *h){
6860 MpegEncContext * const s = &h->s;
6862 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6867 type+= show_bits(&s->gb, 8);
6868 }while(get_bits(&s->gb, 8) == 255);
6872 size+= show_bits(&s->gb, 8);
6873 }while(get_bits(&s->gb, 8) == 255);
6876 case 1: // Picture timing SEI
6877 if(decode_picture_timing(h) < 0)
6881 if(decode_unregistered_user_data(h, size) < 0)
6885 skip_bits(&s->gb, 8*size);
6888 //FIXME check bits here
6889 align_get_bits(&s->gb);
6895 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6896 MpegEncContext * const s = &h->s;
6898 cpb_count = get_ue_golomb(&s->gb) + 1;
6900 if(cpb_count > 32U){
6901 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6905 get_bits(&s->gb, 4); /* bit_rate_scale */
6906 get_bits(&s->gb, 4); /* cpb_size_scale */
6907 for(i=0; i<cpb_count; i++){
6908 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6909 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6910 get_bits1(&s->gb); /* cbr_flag */
6912 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6913 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6914 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6915 sps->time_offset_length = get_bits(&s->gb, 5);
6919 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6920 MpegEncContext * const s = &h->s;
6921 int aspect_ratio_info_present_flag;
6922 unsigned int aspect_ratio_idc;
6924 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6926 if( aspect_ratio_info_present_flag ) {
6927 aspect_ratio_idc= get_bits(&s->gb, 8);
6928 if( aspect_ratio_idc == EXTENDED_SAR ) {
6929 sps->sar.num= get_bits(&s->gb, 16);
6930 sps->sar.den= get_bits(&s->gb, 16);
6931 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6932 sps->sar= pixel_aspect[aspect_ratio_idc];
6934 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6941 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6943 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6944 get_bits1(&s->gb); /* overscan_appropriate_flag */
6947 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6948 get_bits(&s->gb, 3); /* video_format */
6949 get_bits1(&s->gb); /* video_full_range_flag */
6950 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6951 get_bits(&s->gb, 8); /* colour_primaries */
6952 get_bits(&s->gb, 8); /* transfer_characteristics */
6953 get_bits(&s->gb, 8); /* matrix_coefficients */
6957 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6958 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6959 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6962 sps->timing_info_present_flag = get_bits1(&s->gb);
6963 if(sps->timing_info_present_flag){
6964 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6965 sps->time_scale = get_bits_long(&s->gb, 32);
6966 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6969 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6970 if(sps->nal_hrd_parameters_present_flag)
6971 if(decode_hrd_parameters(h, sps) < 0)
6973 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6974 if(sps->vcl_hrd_parameters_present_flag)
6975 if(decode_hrd_parameters(h, sps) < 0)
6977 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6978 get_bits1(&s->gb); /* low_delay_hrd_flag */
6979 sps->pic_struct_present_flag = get_bits1(&s->gb);
6981 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6982 if(sps->bitstream_restriction_flag){
6983 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6984 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6985 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6986 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6987 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6988 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6989 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6991 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6992 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7000 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7001 const uint8_t *jvt_list, const uint8_t *fallback_list){
7002 MpegEncContext * const s = &h->s;
7003 int i, last = 8, next = 8;
7004 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7005 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7006 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7008 for(i=0;i<size;i++){
7010 next = (last + get_se_golomb(&s->gb)) & 0xff;
7011 if(!i && !next){ /* matrix not written, we use the preset one */
7012 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7015 last = factors[scan[i]] = next ? next : last;
7019 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7020 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7021 MpegEncContext * const s = &h->s;
7022 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7023 const uint8_t *fallback[4] = {
7024 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7025 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7026 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7027 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7029 if(get_bits1(&s->gb)){
7030 sps->scaling_matrix_present |= is_sps;
7031 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7032 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7033 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7034 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7035 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7036 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7037 if(is_sps || pps->transform_8x8_mode){
7038 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7039 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7044 static inline int decode_seq_parameter_set(H264Context *h){
7045 MpegEncContext * const s = &h->s;
7046 int profile_idc, level_idc;
7047 unsigned int sps_id;
7051 profile_idc= get_bits(&s->gb, 8);
7052 get_bits1(&s->gb); //constraint_set0_flag
7053 get_bits1(&s->gb); //constraint_set1_flag
7054 get_bits1(&s->gb); //constraint_set2_flag
7055 get_bits1(&s->gb); //constraint_set3_flag
7056 get_bits(&s->gb, 4); // reserved
7057 level_idc= get_bits(&s->gb, 8);
7058 sps_id= get_ue_golomb(&s->gb);
7060 if(sps_id >= MAX_SPS_COUNT) {
7061 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7064 sps= av_mallocz(sizeof(SPS));
7068 sps->profile_idc= profile_idc;
7069 sps->level_idc= level_idc;
7071 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7072 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7073 sps->scaling_matrix_present = 0;
7075 if(sps->profile_idc >= 100){ //high profile
7076 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7077 if(sps->chroma_format_idc == 3)
7078 get_bits1(&s->gb); //residual_color_transform_flag
7079 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7080 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7081 sps->transform_bypass = get_bits1(&s->gb);
7082 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7084 sps->chroma_format_idc= 1;
7087 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7088 sps->poc_type= get_ue_golomb(&s->gb);
7090 if(sps->poc_type == 0){ //FIXME #define
7091 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7092 } else if(sps->poc_type == 1){//FIXME #define
7093 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7094 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7095 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7096 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7098 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7099 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7103 for(i=0; i<sps->poc_cycle_length; i++)
7104 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7105 }else if(sps->poc_type != 2){
7106 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7110 sps->ref_frame_count= get_ue_golomb(&s->gb);
7111 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7112 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7115 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7116 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7117 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7118 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7119 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7120 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7124 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7125 if(!sps->frame_mbs_only_flag)
7126 sps->mb_aff= get_bits1(&s->gb);
7130 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7132 #ifndef ALLOW_INTERLACE
7134 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7136 sps->crop= get_bits1(&s->gb);
7138 sps->crop_left = get_ue_golomb(&s->gb);
7139 sps->crop_right = get_ue_golomb(&s->gb);
7140 sps->crop_top = get_ue_golomb(&s->gb);
7141 sps->crop_bottom= get_ue_golomb(&s->gb);
7142 if(sps->crop_left || sps->crop_top){
7143 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7145 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7146 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7152 sps->crop_bottom= 0;
7155 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7156 if( sps->vui_parameters_present_flag )
7157 decode_vui_parameters(h, sps);
7159 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7160 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7161 sps_id, sps->profile_idc, sps->level_idc,
7163 sps->ref_frame_count,
7164 sps->mb_width, sps->mb_height,
7165 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7166 sps->direct_8x8_inference_flag ? "8B8" : "",
7167 sps->crop_left, sps->crop_right,
7168 sps->crop_top, sps->crop_bottom,
7169 sps->vui_parameters_present_flag ? "VUI" : "",
7170 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7173 av_free(h->sps_buffers[sps_id]);
7174 h->sps_buffers[sps_id]= sps;
7182 build_qp_table(PPS *pps, int t, int index)
7185 for(i = 0; i < 52; i++)
7186 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7189 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7190 MpegEncContext * const s = &h->s;
7191 unsigned int pps_id= get_ue_golomb(&s->gb);
7194 if(pps_id >= MAX_PPS_COUNT) {
7195 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7199 pps= av_mallocz(sizeof(PPS));
7202 pps->sps_id= get_ue_golomb(&s->gb);
7203 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7204 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7208 pps->cabac= get_bits1(&s->gb);
7209 pps->pic_order_present= get_bits1(&s->gb);
7210 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7211 if(pps->slice_group_count > 1 ){
7212 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7213 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7214 switch(pps->mb_slice_group_map_type){
7217 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7218 | run_length[ i ] |1 |ue(v) |
7223 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7225 | top_left_mb[ i ] |1 |ue(v) |
7226 | bottom_right_mb[ i ] |1 |ue(v) |
7234 | slice_group_change_direction_flag |1 |u(1) |
7235 | slice_group_change_rate_minus1 |1 |ue(v) |
7240 | slice_group_id_cnt_minus1 |1 |ue(v) |
7241 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7243 | slice_group_id[ i ] |1 |u(v) |
7248 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7249 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7250 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7251 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7255 pps->weighted_pred= get_bits1(&s->gb);
7256 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7257 pps->init_qp= get_se_golomb(&s->gb) + 26;
7258 pps->init_qs= get_se_golomb(&s->gb) + 26;
7259 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7260 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7261 pps->constrained_intra_pred= get_bits1(&s->gb);
7262 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7264 pps->transform_8x8_mode= 0;
7265 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7266 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7267 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7269 if(get_bits_count(&s->gb) < bit_length){
7270 pps->transform_8x8_mode= get_bits1(&s->gb);
7271 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7272 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7274 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7277 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7278 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7279 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7280 h->pps.chroma_qp_diff= 1;
7282 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7283 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7284 pps_id, pps->sps_id,
7285 pps->cabac ? "CABAC" : "CAVLC",
7286 pps->slice_group_count,
7287 pps->ref_count[0], pps->ref_count[1],
7288 pps->weighted_pred ? "weighted" : "",
7289 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7290 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7291 pps->constrained_intra_pred ? "CONSTR" : "",
7292 pps->redundant_pic_cnt_present ? "REDU" : "",
7293 pps->transform_8x8_mode ? "8x8DCT" : ""
7297 av_free(h->pps_buffers[pps_id]);
7298 h->pps_buffers[pps_id]= pps;
7306 * Call decode_slice() for each context.
7308 * @param h h264 master context
7309 * @param context_count number of contexts to execute
7311 static void execute_decode_slices(H264Context *h, int context_count){
7312 MpegEncContext * const s = &h->s;
7313 AVCodecContext * const avctx= s->avctx;
7317 if(context_count == 1) {
7318 decode_slice(avctx, &h);
7320 for(i = 1; i < context_count; i++) {
7321 hx = h->thread_context[i];
7322 hx->s.error_recognition = avctx->error_recognition;
7323 hx->s.error_count = 0;
7326 avctx->execute(avctx, (void *)decode_slice,
7327 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7329 /* pull back stuff from slices to master context */
7330 hx = h->thread_context[context_count - 1];
7331 s->mb_x = hx->s.mb_x;
7332 s->mb_y = hx->s.mb_y;
7333 s->dropable = hx->s.dropable;
7334 s->picture_structure = hx->s.picture_structure;
7335 for(i = 1; i < context_count; i++)
7336 h->s.error_count += h->thread_context[i]->s.error_count;
7341 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7342 MpegEncContext * const s = &h->s;
7343 AVCodecContext * const avctx= s->avctx;
7345 H264Context *hx; ///< thread context
7346 int context_count = 0;
7348 h->max_contexts = avctx->thread_count;
7351 for(i=0; i<50; i++){
7352 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7355 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7356 h->current_slice = 0;
7357 if (!s->first_field)
7358 s->current_picture_ptr= NULL;
7370 if(buf_index >= buf_size) break;
7372 for(i = 0; i < h->nal_length_size; i++)
7373 nalsize = (nalsize << 8) | buf[buf_index++];
7374 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7379 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7384 // start code prefix search
7385 for(; buf_index + 3 < buf_size; buf_index++){
7386 // This should always succeed in the first iteration.
7387 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7391 if(buf_index+3 >= buf_size) break;
7396 hx = h->thread_context[context_count];
7398 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7399 if (ptr==NULL || dst_length < 0){
7402 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7404 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7406 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7407 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7410 if (h->is_avc && (nalsize != consumed)){
7411 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7415 buf_index += consumed;
7417 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7418 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7423 switch(hx->nal_unit_type){
7425 if (h->nal_unit_type != NAL_IDR_SLICE) {
7426 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7429 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7431 init_get_bits(&hx->s.gb, ptr, bit_length);
7433 hx->inter_gb_ptr= &hx->s.gb;
7434 hx->s.data_partitioning = 0;
7436 if((err = decode_slice_header(hx, h)))
7439 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7440 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7441 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7442 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7443 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7444 && avctx->skip_frame < AVDISCARD_ALL)
7448 init_get_bits(&hx->s.gb, ptr, bit_length);
7450 hx->inter_gb_ptr= NULL;
7451 hx->s.data_partitioning = 1;
7453 err = decode_slice_header(hx, h);
7456 init_get_bits(&hx->intra_gb, ptr, bit_length);
7457 hx->intra_gb_ptr= &hx->intra_gb;
7460 init_get_bits(&hx->inter_gb, ptr, bit_length);
7461 hx->inter_gb_ptr= &hx->inter_gb;
7463 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7464 && s->context_initialized
7466 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7467 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7468 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7469 && avctx->skip_frame < AVDISCARD_ALL)
7473 init_get_bits(&s->gb, ptr, bit_length);
7477 init_get_bits(&s->gb, ptr, bit_length);
7478 decode_seq_parameter_set(h);
7480 if(s->flags& CODEC_FLAG_LOW_DELAY)
7483 if(avctx->has_b_frames < 2)
7484 avctx->has_b_frames= !s->low_delay;
7487 init_get_bits(&s->gb, ptr, bit_length);
7489 decode_picture_parameter_set(h, bit_length);
7493 case NAL_END_SEQUENCE:
7494 case NAL_END_STREAM:
7495 case NAL_FILLER_DATA:
7497 case NAL_AUXILIARY_SLICE:
7500 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7503 if(context_count == h->max_contexts) {
7504 execute_decode_slices(h, context_count);
7509 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7511 /* Slice could not be decoded in parallel mode, copy down
7512 * NAL unit stuff to context 0 and restart. Note that
7513 * rbsp_buffer is not transferred, but since we no longer
7514 * run in parallel mode this should not be an issue. */
7515 h->nal_unit_type = hx->nal_unit_type;
7516 h->nal_ref_idc = hx->nal_ref_idc;
7522 execute_decode_slices(h, context_count);
7527 * returns the number of bytes consumed for building the current frame
7529 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7530 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7531 if(pos+10>buf_size) pos=buf_size; // oops ;)
7536 static int decode_frame(AVCodecContext *avctx,
7537 void *data, int *data_size,
7538 const uint8_t *buf, int buf_size)
7540 H264Context *h = avctx->priv_data;
7541 MpegEncContext *s = &h->s;
7542 AVFrame *pict = data;
7545 s->flags= avctx->flags;
7546 s->flags2= avctx->flags2;
7548 /* end of stream, output what is still in the buffers */
7549 if (buf_size == 0) {
7553 //FIXME factorize this with the output code below
7554 out = h->delayed_pic[0];
7556 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7557 if(h->delayed_pic[i]->poc < out->poc){
7558 out = h->delayed_pic[i];
7562 for(i=out_idx; h->delayed_pic[i]; i++)
7563 h->delayed_pic[i] = h->delayed_pic[i+1];
7566 *data_size = sizeof(AVFrame);
7567 *pict= *(AVFrame*)out;
7573 if(h->is_avc && !h->got_avcC) {
7574 int i, cnt, nalsize;
7575 unsigned char *p = avctx->extradata;
7576 if(avctx->extradata_size < 7) {
7577 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7581 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7584 /* sps and pps in the avcC always have length coded with 2 bytes,
7585 so put a fake nal_length_size = 2 while parsing them */
7586 h->nal_length_size = 2;
7587 // Decode sps from avcC
7588 cnt = *(p+5) & 0x1f; // Number of sps
7590 for (i = 0; i < cnt; i++) {
7591 nalsize = AV_RB16(p) + 2;
7592 if(decode_nal_units(h, p, nalsize) < 0) {
7593 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7598 // Decode pps from avcC
7599 cnt = *(p++); // Number of pps
7600 for (i = 0; i < cnt; i++) {
7601 nalsize = AV_RB16(p) + 2;
7602 if(decode_nal_units(h, p, nalsize) != nalsize) {
7603 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7608 // Now store right nal length size, that will be use to parse all other nals
7609 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7610 // Do not reparse avcC
7614 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7615 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7620 buf_index=decode_nal_units(h, buf, buf_size);
7624 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7625 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7626 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7630 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7631 Picture *out = s->current_picture_ptr;
7632 Picture *cur = s->current_picture_ptr;
7633 int i, pics, cross_idr, out_of_order, out_idx;
7637 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7638 s->current_picture_ptr->pict_type= s->pict_type;
7641 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7642 h->prev_poc_msb= h->poc_msb;
7643 h->prev_poc_lsb= h->poc_lsb;
7645 h->prev_frame_num_offset= h->frame_num_offset;
7646 h->prev_frame_num= h->frame_num;
7649 * FIXME: Error handling code does not seem to support interlaced
7650 * when slices span multiple rows
7651 * The ff_er_add_slice calls don't work right for bottom
7652 * fields; they cause massive erroneous error concealing
7653 * Error marking covers both fields (top and bottom).
7654 * This causes a mismatched s->error_count
7655 * and a bad error table. Further, the error count goes to
7656 * INT_MAX when called for bottom field, because mb_y is
7657 * past end by one (callers fault) and resync_mb_y != 0
7658 * causes problems for the first MB line, too.
7665 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7666 /* Wait for second field. */
7670 cur->repeat_pict = 0;
7672 /* Signal interlacing information externally. */
7673 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7674 if(h->sps.pic_struct_present_flag){
7675 switch (h->sei_pic_struct)
7677 case SEI_PIC_STRUCT_FRAME:
7678 cur->interlaced_frame = 0;
7680 case SEI_PIC_STRUCT_TOP_FIELD:
7681 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7682 case SEI_PIC_STRUCT_TOP_BOTTOM:
7683 case SEI_PIC_STRUCT_BOTTOM_TOP:
7684 cur->interlaced_frame = 1;
7686 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7687 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7688 // Signal the possibility of telecined film externally (pic_struct 5,6)
7689 // From these hints, let the applications decide if they apply deinterlacing.
7690 cur->repeat_pict = 1;
7691 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7693 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7694 // Force progressive here, as doubling interlaced frame is a bad idea.
7695 cur->interlaced_frame = 0;
7696 cur->repeat_pict = 2;
7698 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7699 cur->interlaced_frame = 0;
7700 cur->repeat_pict = 4;
7704 /* Derive interlacing flag from used decoding process. */
7705 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7708 if (cur->field_poc[0] != cur->field_poc[1]){
7709 /* Derive top_field_first from field pocs. */
7710 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7712 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7713 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7714 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7715 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7716 cur->top_field_first = 1;
7718 cur->top_field_first = 0;
7720 /* Most likely progressive */
7721 cur->top_field_first = 0;
7725 //FIXME do something with unavailable reference frames
7727 /* Sort B-frames into display order */
7729 if(h->sps.bitstream_restriction_flag
7730 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7731 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7735 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7736 && !h->sps.bitstream_restriction_flag){
7737 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7742 while(h->delayed_pic[pics]) pics++;
7744 assert(pics <= MAX_DELAYED_PIC_COUNT);
7746 h->delayed_pic[pics++] = cur;
7747 if(cur->reference == 0)
7748 cur->reference = DELAYED_PIC_REF;
7750 out = h->delayed_pic[0];
7752 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7753 if(h->delayed_pic[i]->poc < out->poc){
7754 out = h->delayed_pic[i];
7757 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7759 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7761 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7763 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7765 ((!cross_idr && out->poc > h->outputed_poc + 2)
7766 || cur->pict_type == FF_B_TYPE)))
7769 s->avctx->has_b_frames++;
7772 if(out_of_order || pics > s->avctx->has_b_frames){
7773 out->reference &= ~DELAYED_PIC_REF;
7774 for(i=out_idx; h->delayed_pic[i]; i++)
7775 h->delayed_pic[i] = h->delayed_pic[i+1];
7777 if(!out_of_order && pics > s->avctx->has_b_frames){
7778 *data_size = sizeof(AVFrame);
7780 h->outputed_poc = out->poc;
7781 *pict= *(AVFrame*)out;
7783 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7788 assert(pict->data[0] || !*data_size);
7789 ff_print_debug_info(s, pict);
7790 //printf("out %d\n", (int)pict->data[0]);
7793 /* Return the Picture timestamp as the frame number */
7794 /* we subtract 1 because it is added on utils.c */
7795 avctx->frame_number = s->picture_number - 1;
7797 return get_consumed_bytes(s, buf_index, buf_size);
7800 static inline void fill_mb_avail(H264Context *h){
7801 MpegEncContext * const s = &h->s;
7802 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7805 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7806 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7807 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7813 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7814 h->mb_avail[4]= 1; //FIXME move out
7815 h->mb_avail[5]= 0; //FIXME move out
7823 #define SIZE (COUNT*40)
7829 // int int_temp[10000];
7831 AVCodecContext avctx;
7833 dsputil_init(&dsp, &avctx);
7835 init_put_bits(&pb, temp, SIZE);
7836 printf("testing unsigned exp golomb\n");
7837 for(i=0; i<COUNT; i++){
7839 set_ue_golomb(&pb, i);
7840 STOP_TIMER("set_ue_golomb");
7842 flush_put_bits(&pb);
7844 init_get_bits(&gb, temp, 8*SIZE);
7845 for(i=0; i<COUNT; i++){
7848 s= show_bits(&gb, 24);
7851 j= get_ue_golomb(&gb);
7853 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7856 STOP_TIMER("get_ue_golomb");
7860 init_put_bits(&pb, temp, SIZE);
7861 printf("testing signed exp golomb\n");
7862 for(i=0; i<COUNT; i++){
7864 set_se_golomb(&pb, i - COUNT/2);
7865 STOP_TIMER("set_se_golomb");
7867 flush_put_bits(&pb);
7869 init_get_bits(&gb, temp, 8*SIZE);
7870 for(i=0; i<COUNT; i++){
7873 s= show_bits(&gb, 24);
7876 j= get_se_golomb(&gb);
7877 if(j != i - COUNT/2){
7878 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7881 STOP_TIMER("get_se_golomb");
7885 printf("testing 4x4 (I)DCT\n");
7888 uint8_t src[16], ref[16];
7889 uint64_t error= 0, max_error=0;
7891 for(i=0; i<COUNT; i++){
7893 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7894 for(j=0; j<16; j++){
7895 ref[j]= random()%255;
7896 src[j]= random()%255;
7899 h264_diff_dct_c(block, src, ref, 4);
7902 for(j=0; j<16; j++){
7903 // printf("%d ", block[j]);
7904 block[j]= block[j]*4;
7905 if(j&1) block[j]= (block[j]*4 + 2)/5;
7906 if(j&4) block[j]= (block[j]*4 + 2)/5;
7910 s->dsp.h264_idct_add(ref, block, 4);
7911 /* for(j=0; j<16; j++){
7912 printf("%d ", ref[j]);
7916 for(j=0; j<16; j++){
7917 int diff= FFABS(src[j] - ref[j]);
7920 max_error= FFMAX(max_error, diff);
7923 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7924 printf("testing quantizer\n");
7925 for(qp=0; qp<52; qp++){
7927 src1_block[i]= src2_block[i]= random()%255;
7930 printf("Testing NAL layer\n");
7932 uint8_t bitstream[COUNT];
7933 uint8_t nal[COUNT*2];
7935 memset(&h, 0, sizeof(H264Context));
7937 for(i=0; i<COUNT; i++){
7945 for(j=0; j<COUNT; j++){
7946 bitstream[j]= (random() % 255) + 1;
7949 for(j=0; j<zeros; j++){
7950 int pos= random() % COUNT;
7951 while(bitstream[pos] == 0){
7960 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7962 printf("encoding failed\n");
7966 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7970 if(out_length != COUNT){
7971 printf("incorrect length %d %d\n", out_length, COUNT);
7975 if(consumed != nal_length){
7976 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7980 if(memcmp(bitstream, out, COUNT)){
7981 printf("mismatch\n");
7987 printf("Testing RBSP\n");
7995 static av_cold int decode_end(AVCodecContext *avctx)
7997 H264Context *h = avctx->priv_data;
7998 MpegEncContext *s = &h->s;
8001 av_freep(&h->rbsp_buffer[0]);
8002 av_freep(&h->rbsp_buffer[1]);
8003 free_tables(h); //FIXME cleanup init stuff perhaps
8005 for(i = 0; i < MAX_SPS_COUNT; i++)
8006 av_freep(h->sps_buffers + i);
8008 for(i = 0; i < MAX_PPS_COUNT; i++)
8009 av_freep(h->pps_buffers + i);
8013 // memset(h, 0, sizeof(H264Context));
8019 AVCodec h264_decoder = {
8023 sizeof(H264Context),
8028 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8030 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),