2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
164 left_block = left_block_options[1];
166 left_block= left_block_options[2];
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
260 if(!(top_type & type_mask))
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
276 if(!(left_type[i] & type_mask))
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
576 if(!(h->top_samples_available&0x8000)){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
619 if(!(h->top_samples_available&0x8000)){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
627 if((h->left_samples_available&0x8080) != 0x8080){
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
786 }else if(top_ref==ref){
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
892 pred_motion(h, 0, 4, 0, 0, mx, my);
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
951 map[list][old_ref] = cur_ref;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1089 for(list=0; list<2; list++){
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1093 mv[list][0] = mv[list][1] = 0;
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1125 a= pack16to32(mv[0][0],mv[0][1]);
1127 b= pack16to32(mv[1][0],mv[1][1]);
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1145 a= pack16to32(mv[0][0],mv[0][1]);
1147 b= pack16to32(mv[1][0],mv[1][1]);
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1294 ref0 = map_col_to_list0[0][ref0];
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1333 if(!USES_LIST(mb_type, list))
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1395 /* startcode, so we must be past the end */
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1410 dst= h->rbsp_buffer[bufidx];
1416 //printf("decoding esc\n");
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1426 }else //next start code
1430 dst[di++]= src[si++];
1433 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1436 *consumed= si + 1;//+1 for the header
1437 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1442 * identifies the exact end of the bitstream
1443 * @return the length of the trailing, or 0 if damaged
1445 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1449 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1459 * IDCT transforms the 16 dc values and dequantizes them.
1460 * @param qp quantization parameter
1462 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1465 int temp[16]; //FIXME check if this is a good idea
1466 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1467 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1469 //memset(block, 64, 2*256);
1472 const int offset= y_offset[i];
1473 const int z0= block[offset+stride*0] + block[offset+stride*4];
1474 const int z1= block[offset+stride*0] - block[offset+stride*4];
1475 const int z2= block[offset+stride*1] - block[offset+stride*5];
1476 const int z3= block[offset+stride*1] + block[offset+stride*5];
1485 const int offset= x_offset[i];
1486 const int z0= temp[4*0+i] + temp[4*2+i];
1487 const int z1= temp[4*0+i] - temp[4*2+i];
1488 const int z2= temp[4*1+i] - temp[4*3+i];
1489 const int z3= temp[4*1+i] + temp[4*3+i];
1491 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1492 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1493 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1494 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1500 * DCT transforms the 16 dc values.
1501 * @param qp quantization parameter ??? FIXME
1503 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1504 // const int qmul= dequant_coeff[qp][0];
1506 int temp[16]; //FIXME check if this is a good idea
1507 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1508 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1511 const int offset= y_offset[i];
1512 const int z0= block[offset+stride*0] + block[offset+stride*4];
1513 const int z1= block[offset+stride*0] - block[offset+stride*4];
1514 const int z2= block[offset+stride*1] - block[offset+stride*5];
1515 const int z3= block[offset+stride*1] + block[offset+stride*5];
1524 const int offset= x_offset[i];
1525 const int z0= temp[4*0+i] + temp[4*2+i];
1526 const int z1= temp[4*0+i] - temp[4*2+i];
1527 const int z2= temp[4*1+i] - temp[4*3+i];
1528 const int z3= temp[4*1+i] + temp[4*3+i];
1530 block[stride*0 +offset]= (z0 + z3)>>1;
1531 block[stride*2 +offset]= (z1 + z2)>>1;
1532 block[stride*8 +offset]= (z1 - z2)>>1;
1533 block[stride*10+offset]= (z0 - z3)>>1;
1541 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1542 const int stride= 16*2;
1543 const int xStride= 16;
1546 a= block[stride*0 + xStride*0];
1547 b= block[stride*0 + xStride*1];
1548 c= block[stride*1 + xStride*0];
1549 d= block[stride*1 + xStride*1];
1556 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1557 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1558 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1559 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1563 static void chroma_dc_dct_c(DCTELEM *block){
1564 const int stride= 16*2;
1565 const int xStride= 16;
1568 a= block[stride*0 + xStride*0];
1569 b= block[stride*0 + xStride*1];
1570 c= block[stride*1 + xStride*0];
1571 d= block[stride*1 + xStride*1];
1578 block[stride*0 + xStride*0]= (a+c);
1579 block[stride*0 + xStride*1]= (e+b);
1580 block[stride*1 + xStride*0]= (a-c);
1581 block[stride*1 + xStride*1]= (e-b);
1586 * gets the chroma qp.
1588 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1589 return h->pps.chroma_qp_table[t][qscale];
1592 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1593 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1594 int src_x_offset, int src_y_offset,
1595 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1596 MpegEncContext * const s = &h->s;
1597 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1598 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1599 const int luma_xy= (mx&3) + ((my&3)<<2);
1600 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1601 uint8_t * src_cb, * src_cr;
1602 int extra_width= h->emu_edge_width;
1603 int extra_height= h->emu_edge_height;
1605 const int full_mx= mx>>2;
1606 const int full_my= my>>2;
1607 const int pic_width = 16*s->mb_width;
1608 const int pic_height = 16*s->mb_height >> MB_FIELD;
1610 if(mx&7) extra_width -= 3;
1611 if(my&7) extra_height -= 3;
1613 if( full_mx < 0-extra_width
1614 || full_my < 0-extra_height
1615 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1616 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1617 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1618 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1622 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1624 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1627 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1630 // chroma offset when predicting from a field of opposite parity
1631 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1632 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1634 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1635 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1638 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1639 src_cb= s->edge_emu_buffer;
1641 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1644 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1645 src_cr= s->edge_emu_buffer;
1647 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1650 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1651 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1652 int x_offset, int y_offset,
1653 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1654 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1655 int list0, int list1){
1656 MpegEncContext * const s = &h->s;
1657 qpel_mc_func *qpix_op= qpix_put;
1658 h264_chroma_mc_func chroma_op= chroma_put;
1660 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1661 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1662 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1663 x_offset += 8*s->mb_x;
1664 y_offset += 8*(s->mb_y >> MB_FIELD);
1667 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1668 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1669 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1670 qpix_op, chroma_op);
1673 chroma_op= chroma_avg;
1677 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1684 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1685 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1686 int x_offset, int y_offset,
1687 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1688 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1689 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1690 int list0, int list1){
1691 MpegEncContext * const s = &h->s;
1693 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1694 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1695 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1696 x_offset += 8*s->mb_x;
1697 y_offset += 8*(s->mb_y >> MB_FIELD);
1700 /* don't optimize for luma-only case, since B-frames usually
1701 * use implicit weights => chroma too. */
1702 uint8_t *tmp_cb = s->obmc_scratchpad;
1703 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1704 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1705 int refn0 = h->ref_cache[0][ scan8[n] ];
1706 int refn1 = h->ref_cache[1][ scan8[n] ];
1708 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1709 dest_y, dest_cb, dest_cr,
1710 x_offset, y_offset, qpix_put, chroma_put);
1711 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1712 tmp_y, tmp_cb, tmp_cr,
1713 x_offset, y_offset, qpix_put, chroma_put);
1715 if(h->use_weight == 2){
1716 int weight0 = h->implicit_weight[refn0][refn1];
1717 int weight1 = 64 - weight0;
1718 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1719 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1720 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1722 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1723 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1724 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1725 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1726 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1727 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1728 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1729 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1730 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1733 int list = list1 ? 1 : 0;
1734 int refn = h->ref_cache[list][ scan8[n] ];
1735 Picture *ref= &h->ref_list[list][refn];
1736 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1737 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1738 qpix_put, chroma_put);
1740 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1741 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1742 if(h->use_weight_chroma){
1743 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1745 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1746 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1751 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1752 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1753 int x_offset, int y_offset,
1754 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1755 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1756 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1757 int list0, int list1){
1758 if((h->use_weight==2 && list0 && list1
1759 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1760 || h->use_weight==1)
1761 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1762 x_offset, y_offset, qpix_put, chroma_put,
1763 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1765 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1766 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1769 static inline void prefetch_motion(H264Context *h, int list){
1770 /* fetch pixels for estimated mv 4 macroblocks ahead
1771 * optimized for 64byte cache lines */
1772 MpegEncContext * const s = &h->s;
1773 const int refn = h->ref_cache[list][scan8[0]];
1775 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1776 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1777 uint8_t **src= h->ref_list[list][refn].data;
1778 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1779 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1780 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1781 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1785 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1786 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1787 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1788 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1789 MpegEncContext * const s = &h->s;
1790 const int mb_xy= h->mb_xy;
1791 const int mb_type= s->current_picture.mb_type[mb_xy];
1793 assert(IS_INTER(mb_type));
1795 prefetch_motion(h, 0);
1797 if(IS_16X16(mb_type)){
1798 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1799 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1800 &weight_op[0], &weight_avg[0],
1801 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1802 }else if(IS_16X8(mb_type)){
1803 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1804 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1805 &weight_op[1], &weight_avg[1],
1806 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1807 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1808 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1809 &weight_op[1], &weight_avg[1],
1810 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1811 }else if(IS_8X16(mb_type)){
1812 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1813 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1814 &weight_op[2], &weight_avg[2],
1815 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1816 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1817 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1818 &weight_op[2], &weight_avg[2],
1819 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1823 assert(IS_8X8(mb_type));
1826 const int sub_mb_type= h->sub_mb_type[i];
1828 int x_offset= (i&1)<<2;
1829 int y_offset= (i&2)<<1;
1831 if(IS_SUB_8X8(sub_mb_type)){
1832 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1833 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1834 &weight_op[3], &weight_avg[3],
1835 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1836 }else if(IS_SUB_8X4(sub_mb_type)){
1837 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1838 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1839 &weight_op[4], &weight_avg[4],
1840 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1841 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1842 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1843 &weight_op[4], &weight_avg[4],
1844 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1845 }else if(IS_SUB_4X8(sub_mb_type)){
1846 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1847 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1848 &weight_op[5], &weight_avg[5],
1849 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1850 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1851 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1852 &weight_op[5], &weight_avg[5],
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1856 assert(IS_SUB_4X4(sub_mb_type));
1858 int sub_x_offset= x_offset + 2*(j&1);
1859 int sub_y_offset= y_offset + (j&2);
1860 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[6], &weight_avg[6],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1869 prefetch_motion(h, 1);
1872 static av_cold void decode_init_vlc(void){
1873 static int done = 0;
1880 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1881 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1882 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1883 &chroma_dc_coeff_token_len [0], 1, 1,
1884 &chroma_dc_coeff_token_bits[0], 1, 1,
1885 INIT_VLC_USE_NEW_STATIC);
1889 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1890 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1891 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1892 &coeff_token_len [i][0], 1, 1,
1893 &coeff_token_bits[i][0], 1, 1,
1894 INIT_VLC_USE_NEW_STATIC);
1895 offset += coeff_token_vlc_tables_size[i];
1898 * This is a one time safety check to make sure that
1899 * the packed static coeff_token_vlc table sizes
1900 * were initialized correctly.
1902 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1905 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1906 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1907 init_vlc(&chroma_dc_total_zeros_vlc[i],
1908 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1909 &chroma_dc_total_zeros_len [i][0], 1, 1,
1910 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1911 INIT_VLC_USE_NEW_STATIC);
1913 for(i=0; i<15; i++){
1914 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1915 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1916 init_vlc(&total_zeros_vlc[i],
1917 TOTAL_ZEROS_VLC_BITS, 16,
1918 &total_zeros_len [i][0], 1, 1,
1919 &total_zeros_bits[i][0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1924 run_vlc[i].table = run_vlc_tables[i];
1925 run_vlc[i].table_allocated = run_vlc_tables_size;
1926 init_vlc(&run_vlc[i],
1928 &run_len [i][0], 1, 1,
1929 &run_bits[i][0], 1, 1,
1930 INIT_VLC_USE_NEW_STATIC);
1932 run7_vlc.table = run7_vlc_table,
1933 run7_vlc.table_allocated = run7_vlc_table_size;
1934 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1935 &run_len [6][0], 1, 1,
1936 &run_bits[6][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1941 static void free_tables(H264Context *h){
1944 av_freep(&h->intra4x4_pred_mode);
1945 av_freep(&h->chroma_pred_mode_table);
1946 av_freep(&h->cbp_table);
1947 av_freep(&h->mvd_table[0]);
1948 av_freep(&h->mvd_table[1]);
1949 av_freep(&h->direct_table);
1950 av_freep(&h->non_zero_count);
1951 av_freep(&h->slice_table_base);
1952 h->slice_table= NULL;
1954 av_freep(&h->mb2b_xy);
1955 av_freep(&h->mb2b8_xy);
1957 for(i = 0; i < h->s.avctx->thread_count; i++) {
1958 hx = h->thread_context[i];
1960 av_freep(&hx->top_borders[1]);
1961 av_freep(&hx->top_borders[0]);
1962 av_freep(&hx->s.obmc_scratchpad);
1966 static void init_dequant8_coeff_table(H264Context *h){
1968 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1969 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1970 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1972 for(i=0; i<2; i++ ){
1973 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1974 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1978 for(q=0; q<52; q++){
1979 int shift = div6[q];
1982 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1983 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1984 h->pps.scaling_matrix8[i][x]) << shift;
1989 static void init_dequant4_coeff_table(H264Context *h){
1991 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1992 for(i=0; i<6; i++ ){
1993 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1995 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1996 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2003 for(q=0; q<52; q++){
2004 int shift = div6[q] + 2;
2007 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2008 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2009 h->pps.scaling_matrix4[i][x]) << shift;
2014 static void init_dequant_tables(H264Context *h){
2016 init_dequant4_coeff_table(h);
2017 if(h->pps.transform_8x8_mode)
2018 init_dequant8_coeff_table(h);
2019 if(h->sps.transform_bypass){
2022 h->dequant4_coeff[i][0][x] = 1<<6;
2023 if(h->pps.transform_8x8_mode)
2026 h->dequant8_coeff[i][0][x] = 1<<6;
2033 * needs width/height
2035 static int alloc_tables(H264Context *h){
2036 MpegEncContext * const s = &h->s;
2037 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2040 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2042 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2043 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2044 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2046 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2047 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2048 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2049 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2051 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2052 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2054 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2055 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2056 for(y=0; y<s->mb_height; y++){
2057 for(x=0; x<s->mb_width; x++){
2058 const int mb_xy= x + y*s->mb_stride;
2059 const int b_xy = 4*x + 4*y*h->b_stride;
2060 const int b8_xy= 2*x + 2*y*h->b8_stride;
2062 h->mb2b_xy [mb_xy]= b_xy;
2063 h->mb2b8_xy[mb_xy]= b8_xy;
2067 s->obmc_scratchpad = NULL;
2069 if(!h->dequant4_coeff[0])
2070 init_dequant_tables(h);
2079 * Mimic alloc_tables(), but for every context thread.
2081 static void clone_tables(H264Context *dst, H264Context *src){
2082 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2083 dst->non_zero_count = src->non_zero_count;
2084 dst->slice_table = src->slice_table;
2085 dst->cbp_table = src->cbp_table;
2086 dst->mb2b_xy = src->mb2b_xy;
2087 dst->mb2b8_xy = src->mb2b8_xy;
2088 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2089 dst->mvd_table[0] = src->mvd_table[0];
2090 dst->mvd_table[1] = src->mvd_table[1];
2091 dst->direct_table = src->direct_table;
2093 dst->s.obmc_scratchpad = NULL;
2094 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2099 * Allocate buffers which are not shared amongst multiple threads.
2101 static int context_init(H264Context *h){
2102 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2107 return -1; // free_tables will clean up for us
2110 static av_cold void common_init(H264Context *h){
2111 MpegEncContext * const s = &h->s;
2113 s->width = s->avctx->width;
2114 s->height = s->avctx->height;
2115 s->codec_id= s->avctx->codec->id;
2117 ff_h264_pred_init(&h->hpc, s->codec_id);
2119 h->dequant_coeff_pps= -1;
2120 s->unrestricted_mv=1;
2121 s->decode=1; //FIXME
2123 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2125 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2126 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2129 static av_cold int decode_init(AVCodecContext *avctx){
2130 H264Context *h= avctx->priv_data;
2131 MpegEncContext * const s = &h->s;
2133 MPV_decode_defaults(s);
2138 s->out_format = FMT_H264;
2139 s->workaround_bugs= avctx->workaround_bugs;
2142 // s->decode_mb= ff_h263_decode_mb;
2143 s->quarter_sample = 1;
2146 if(avctx->codec_id == CODEC_ID_SVQ3)
2147 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2149 avctx->pix_fmt= PIX_FMT_YUV420P;
2153 if(avctx->extradata_size > 0 && avctx->extradata &&
2154 *(char *)avctx->extradata == 1){
2161 h->thread_context[0] = h;
2162 h->outputed_poc = INT_MIN;
2163 h->prev_poc_msb= 1<<16;
2167 static int frame_start(H264Context *h){
2168 MpegEncContext * const s = &h->s;
2171 if(MPV_frame_start(s, s->avctx) < 0)
2173 ff_er_frame_start(s);
2175 * MPV_frame_start uses pict_type to derive key_frame.
2176 * This is incorrect for H.264; IDR markings must be used.
2177 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2178 * See decode_nal_units().
2180 s->current_picture_ptr->key_frame= 0;
2182 assert(s->linesize && s->uvlinesize);
2184 for(i=0; i<16; i++){
2185 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2186 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2189 h->block_offset[16+i]=
2190 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2191 h->block_offset[24+16+i]=
2192 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2195 /* can't be in alloc_tables because linesize isn't known there.
2196 * FIXME: redo bipred weight to not require extra buffer? */
2197 for(i = 0; i < s->avctx->thread_count; i++)
2198 if(!h->thread_context[i]->s.obmc_scratchpad)
2199 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2201 /* some macroblocks will be accessed before they're available */
2202 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2203 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2205 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2207 // We mark the current picture as non-reference after allocating it, so
2208 // that if we break out due to an error it can be released automatically
2209 // in the next MPV_frame_start().
2210 // SVQ3 as well as most other codecs have only last/next/current and thus
2211 // get released even with set reference, besides SVQ3 and others do not
2212 // mark frames as reference later "naturally".
2213 if(s->codec_id != CODEC_ID_SVQ3)
2214 s->current_picture_ptr->reference= 0;
2216 s->current_picture_ptr->field_poc[0]=
2217 s->current_picture_ptr->field_poc[1]= INT_MAX;
2218 assert(s->current_picture_ptr->long_ref==0);
2223 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2224 MpegEncContext * const s = &h->s;
2233 src_cb -= uvlinesize;
2234 src_cr -= uvlinesize;
2236 if(!simple && FRAME_MBAFF){
2238 offset = MB_MBAFF ? 1 : 17;
2239 uvoffset= MB_MBAFF ? 1 : 9;
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2242 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2243 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2244 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2245 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2250 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2251 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2252 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2253 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2259 top_idx = MB_MBAFF ? 0 : 1;
2261 step= MB_MBAFF ? 2 : 1;
2264 // There are two lines saved, the line above the the top macroblock of a pair,
2265 // and the line above the bottom macroblock
2266 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2267 for(i=1; i<17 - skiplast; i++){
2268 h->left_border[offset+i*step]= src_y[15+i* linesize];
2271 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2272 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2274 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2275 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2276 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2277 for(i=1; i<9 - skiplast; i++){
2278 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2279 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2281 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2282 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2286 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2287 MpegEncContext * const s = &h->s;
2298 if(!simple && FRAME_MBAFF){
2300 offset = MB_MBAFF ? 1 : 17;
2301 uvoffset= MB_MBAFF ? 1 : 9;
2305 top_idx = MB_MBAFF ? 0 : 1;
2307 step= MB_MBAFF ? 2 : 1;
2310 if(h->deblocking_filter == 2) {
2312 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2313 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2315 deblock_left = (s->mb_x > 0);
2316 deblock_top = (s->mb_y > !!MB_FIELD);
2319 src_y -= linesize + 1;
2320 src_cb -= uvlinesize + 1;
2321 src_cr -= uvlinesize + 1;
2323 #define XCHG(a,b,t,xchg)\
2330 for(i = !deblock_top; i<16; i++){
2331 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2333 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2337 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2338 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2339 if(s->mb_x+1 < s->mb_width){
2340 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2344 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2346 for(i = !deblock_top; i<8; i++){
2347 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2348 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2350 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2351 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2355 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2360 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2361 MpegEncContext * const s = &h->s;
2362 const int mb_x= s->mb_x;
2363 const int mb_y= s->mb_y;
2364 const int mb_xy= h->mb_xy;
2365 const int mb_type= s->current_picture.mb_type[mb_xy];
2366 uint8_t *dest_y, *dest_cb, *dest_cr;
2367 int linesize, uvlinesize /*dct_offset*/;
2369 int *block_offset = &h->block_offset[0];
2370 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2371 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2372 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2373 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2375 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2376 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2377 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2379 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2380 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2382 if (!simple && MB_FIELD) {
2383 linesize = h->mb_linesize = s->linesize * 2;
2384 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2385 block_offset = &h->block_offset[24];
2386 if(mb_y&1){ //FIXME move out of this function?
2387 dest_y -= s->linesize*15;
2388 dest_cb-= s->uvlinesize*7;
2389 dest_cr-= s->uvlinesize*7;
2393 for(list=0; list<h->list_count; list++){
2394 if(!USES_LIST(mb_type, list))
2396 if(IS_16X16(mb_type)){
2397 int8_t *ref = &h->ref_cache[list][scan8[0]];
2398 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2400 for(i=0; i<16; i+=4){
2401 int ref = h->ref_cache[list][scan8[i]];
2403 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2409 linesize = h->mb_linesize = s->linesize;
2410 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2411 // dct_offset = s->linesize * 16;
2414 if (!simple && IS_INTRA_PCM(mb_type)) {
2415 for (i=0; i<16; i++) {
2416 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2418 for (i=0; i<8; i++) {
2419 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2420 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2423 if(IS_INTRA(mb_type)){
2424 if(h->deblocking_filter)
2425 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2427 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2428 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2429 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2432 if(IS_INTRA4x4(mb_type)){
2433 if(simple || !s->encoding){
2434 if(IS_8x8DCT(mb_type)){
2435 if(transform_bypass){
2437 idct_add = s->dsp.add_pixels8;
2439 idct_dc_add = s->dsp.h264_idct8_dc_add;
2440 idct_add = s->dsp.h264_idct8_add;
2442 for(i=0; i<16; i+=4){
2443 uint8_t * const ptr= dest_y + block_offset[i];
2444 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2445 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2446 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2448 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2449 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2450 (h->topright_samples_available<<i)&0x4000, linesize);
2452 if(nnz == 1 && h->mb[i*16])
2453 idct_dc_add(ptr, h->mb + i*16, linesize);
2455 idct_add (ptr, h->mb + i*16, linesize);
2460 if(transform_bypass){
2462 idct_add = s->dsp.add_pixels4;
2464 idct_dc_add = s->dsp.h264_idct_dc_add;
2465 idct_add = s->dsp.h264_idct_add;
2467 for(i=0; i<16; i++){
2468 uint8_t * const ptr= dest_y + block_offset[i];
2469 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2471 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2472 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2476 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2477 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2478 assert(mb_y || linesize <= block_offset[i]);
2479 if(!topright_avail){
2480 tr= ptr[3 - linesize]*0x01010101;
2481 topright= (uint8_t*) &tr;
2483 topright= ptr + 4 - linesize;
2487 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2488 nnz = h->non_zero_count_cache[ scan8[i] ];
2491 if(nnz == 1 && h->mb[i*16])
2492 idct_dc_add(ptr, h->mb + i*16, linesize);
2494 idct_add (ptr, h->mb + i*16, linesize);
2496 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2503 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2505 if(!transform_bypass)
2506 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2508 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2510 if(h->deblocking_filter)
2511 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2513 hl_motion(h, dest_y, dest_cb, dest_cr,
2514 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2515 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2516 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2520 if(!IS_INTRA4x4(mb_type)){
2522 if(IS_INTRA16x16(mb_type)){
2523 if(transform_bypass){
2524 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2525 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2527 for(i=0; i<16; i++){
2528 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2529 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2533 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2535 }else if(h->cbp&15){
2536 if(transform_bypass){
2537 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2538 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2539 for(i=0; i<16; i+=di){
2540 if(h->non_zero_count_cache[ scan8[i] ]){
2541 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2545 if(IS_8x8DCT(mb_type)){
2546 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2548 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2553 for(i=0; i<16; i++){
2554 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2555 uint8_t * const ptr= dest_y + block_offset[i];
2556 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2562 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2563 uint8_t *dest[2] = {dest_cb, dest_cr};
2564 if(transform_bypass){
2565 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2566 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2567 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2569 idct_add = s->dsp.add_pixels4;
2570 for(i=16; i<16+8; i++){
2571 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2572 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2576 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2577 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2579 idct_add = s->dsp.h264_idct_add;
2580 idct_dc_add = s->dsp.h264_idct_dc_add;
2581 for(i=16; i<16+8; i++){
2582 if(h->non_zero_count_cache[ scan8[i] ])
2583 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2584 else if(h->mb[i*16])
2585 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2588 for(i=16; i<16+8; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2590 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2591 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2598 if(h->deblocking_filter) {
2599 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2600 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2601 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2602 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2603 if (!simple && FRAME_MBAFF) {
2604 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2606 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2612 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2614 static void hl_decode_mb_simple(H264Context *h){
2615 hl_decode_mb_internal(h, 1);
2619 * Process a macroblock; this handles edge cases, such as interlacing.
2621 static void av_noinline hl_decode_mb_complex(H264Context *h){
2622 hl_decode_mb_internal(h, 0);
2625 static void hl_decode_mb(H264Context *h){
2626 MpegEncContext * const s = &h->s;
2627 const int mb_xy= h->mb_xy;
2628 const int mb_type= s->current_picture.mb_type[mb_xy];
2629 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2631 if(ENABLE_H264_ENCODER && !s->decode)
2635 hl_decode_mb_complex(h);
2636 else hl_decode_mb_simple(h);
2639 static void pic_as_field(Picture *pic, const int parity){
2641 for (i = 0; i < 4; ++i) {
2642 if (parity == PICT_BOTTOM_FIELD)
2643 pic->data[i] += pic->linesize[i];
2644 pic->reference = parity;
2645 pic->linesize[i] *= 2;
2647 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2650 static int split_field_copy(Picture *dest, Picture *src,
2651 int parity, int id_add){
2652 int match = !!(src->reference & parity);
2656 if(parity != PICT_FRAME){
2657 pic_as_field(dest, parity);
2659 dest->pic_id += id_add;
2666 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2670 while(i[0]<len || i[1]<len){
2671 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2673 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2676 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2677 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2680 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2681 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2688 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2693 best_poc= dir ? INT_MIN : INT_MAX;
2695 for(i=0; i<len; i++){
2696 const int poc= src[i]->poc;
2697 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2699 sorted[out_i]= src[i];
2702 if(best_poc == (dir ? INT_MIN : INT_MAX))
2704 limit= sorted[out_i++]->poc - dir;
2710 * fills the default_ref_list.
2712 static int fill_default_ref_list(H264Context *h){
2713 MpegEncContext * const s = &h->s;
2716 if(h->slice_type_nos==FF_B_TYPE){
2717 Picture *sorted[32];
2722 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2724 cur_poc= s->current_picture_ptr->poc;
2726 for(list= 0; list<2; list++){
2727 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2728 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2730 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2731 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2734 if(len < h->ref_count[list])
2735 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2739 if(lens[0] == lens[1] && lens[1] > 1){
2740 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2742 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2745 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2746 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2748 if(len < h->ref_count[0])
2749 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2752 for (i=0; i<h->ref_count[0]; i++) {
2753 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2755 if(h->slice_type_nos==FF_B_TYPE){
2756 for (i=0; i<h->ref_count[1]; i++) {
2757 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2764 static void print_short_term(H264Context *h);
2765 static void print_long_term(H264Context *h);
2768 * Extract structure information about the picture described by pic_num in
2769 * the current decoding context (frame or field). Note that pic_num is
2770 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2771 * @param pic_num picture number for which to extract structure information
2772 * @param structure one of PICT_XXX describing structure of picture
2774 * @return frame number (short term) or long term index of picture
2775 * described by pic_num
2777 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2778 MpegEncContext * const s = &h->s;
2780 *structure = s->picture_structure;
2783 /* opposite field */
2784 *structure ^= PICT_FRAME;
2791 static int decode_ref_pic_list_reordering(H264Context *h){
2792 MpegEncContext * const s = &h->s;
2793 int list, index, pic_structure;
2795 print_short_term(h);
2798 for(list=0; list<h->list_count; list++){
2799 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2801 if(get_bits1(&s->gb)){
2802 int pred= h->curr_pic_num;
2804 for(index=0; ; index++){
2805 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2806 unsigned int pic_id;
2808 Picture *ref = NULL;
2810 if(reordering_of_pic_nums_idc==3)
2813 if(index >= h->ref_count[list]){
2814 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2818 if(reordering_of_pic_nums_idc<3){
2819 if(reordering_of_pic_nums_idc<2){
2820 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2823 if(abs_diff_pic_num > h->max_pic_num){
2824 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2828 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2829 else pred+= abs_diff_pic_num;
2830 pred &= h->max_pic_num - 1;
2832 frame_num = pic_num_extract(h, pred, &pic_structure);
2834 for(i= h->short_ref_count-1; i>=0; i--){
2835 ref = h->short_ref[i];
2836 assert(ref->reference);
2837 assert(!ref->long_ref);
2839 ref->frame_num == frame_num &&
2840 (ref->reference & pic_structure)
2848 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2850 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2853 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2856 ref = h->long_ref[long_idx];
2857 assert(!(ref && !ref->reference));
2858 if(ref && (ref->reference & pic_structure)){
2859 ref->pic_id= pic_id;
2860 assert(ref->long_ref);
2868 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2869 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2871 for(i=index; i+1<h->ref_count[list]; i++){
2872 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2875 for(; i > index; i--){
2876 h->ref_list[list][i]= h->ref_list[list][i-1];
2878 h->ref_list[list][index]= *ref;
2880 pic_as_field(&h->ref_list[list][index], pic_structure);
2884 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2890 for(list=0; list<h->list_count; list++){
2891 for(index= 0; index < h->ref_count[list]; index++){
2892 if(!h->ref_list[list][index].data[0]){
2893 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2894 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2902 static void fill_mbaff_ref_list(H264Context *h){
2904 for(list=0; list<2; list++){ //FIXME try list_count
2905 for(i=0; i<h->ref_count[list]; i++){
2906 Picture *frame = &h->ref_list[list][i];
2907 Picture *field = &h->ref_list[list][16+2*i];
2910 field[0].linesize[j] <<= 1;
2911 field[0].reference = PICT_TOP_FIELD;
2912 field[0].poc= field[0].field_poc[0];
2913 field[1] = field[0];
2915 field[1].data[j] += frame->linesize[j];
2916 field[1].reference = PICT_BOTTOM_FIELD;
2917 field[1].poc= field[1].field_poc[1];
2919 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2920 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2922 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2923 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2927 for(j=0; j<h->ref_count[1]; j++){
2928 for(i=0; i<h->ref_count[0]; i++)
2929 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2930 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2931 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2935 static int pred_weight_table(H264Context *h){
2936 MpegEncContext * const s = &h->s;
2938 int luma_def, chroma_def;
2941 h->use_weight_chroma= 0;
2942 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2943 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2944 luma_def = 1<<h->luma_log2_weight_denom;
2945 chroma_def = 1<<h->chroma_log2_weight_denom;
2947 for(list=0; list<2; list++){
2948 for(i=0; i<h->ref_count[list]; i++){
2949 int luma_weight_flag, chroma_weight_flag;
2951 luma_weight_flag= get_bits1(&s->gb);
2952 if(luma_weight_flag){
2953 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2954 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2955 if( h->luma_weight[list][i] != luma_def
2956 || h->luma_offset[list][i] != 0)
2959 h->luma_weight[list][i]= luma_def;
2960 h->luma_offset[list][i]= 0;
2964 chroma_weight_flag= get_bits1(&s->gb);
2965 if(chroma_weight_flag){
2968 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2969 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2970 if( h->chroma_weight[list][i][j] != chroma_def
2971 || h->chroma_offset[list][i][j] != 0)
2972 h->use_weight_chroma= 1;
2977 h->chroma_weight[list][i][j]= chroma_def;
2978 h->chroma_offset[list][i][j]= 0;
2983 if(h->slice_type_nos != FF_B_TYPE) break;
2985 h->use_weight= h->use_weight || h->use_weight_chroma;
2989 static void implicit_weight_table(H264Context *h){
2990 MpegEncContext * const s = &h->s;
2992 int cur_poc = s->current_picture_ptr->poc;
2994 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2995 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2997 h->use_weight_chroma= 0;
3002 h->use_weight_chroma= 2;
3003 h->luma_log2_weight_denom= 5;
3004 h->chroma_log2_weight_denom= 5;
3006 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3007 int poc0 = h->ref_list[0][ref0].poc;
3008 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3009 int poc1 = h->ref_list[1][ref1].poc;
3010 int td = av_clip(poc1 - poc0, -128, 127);
3012 int tb = av_clip(cur_poc - poc0, -128, 127);
3013 int tx = (16384 + (FFABS(td) >> 1)) / td;
3014 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3015 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3016 h->implicit_weight[ref0][ref1] = 32;
3018 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3020 h->implicit_weight[ref0][ref1] = 32;
3026 * Mark a picture as no longer needed for reference. The refmask
3027 * argument allows unreferencing of individual fields or the whole frame.
3028 * If the picture becomes entirely unreferenced, but is being held for
3029 * display purposes, it is marked as such.
3030 * @param refmask mask of fields to unreference; the mask is bitwise
3031 * anded with the reference marking of pic
3032 * @return non-zero if pic becomes entirely unreferenced (except possibly
3033 * for display purposes) zero if one of the fields remains in
3036 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3038 if (pic->reference &= refmask) {
3041 for(i = 0; h->delayed_pic[i]; i++)
3042 if(pic == h->delayed_pic[i]){
3043 pic->reference=DELAYED_PIC_REF;
3051 * instantaneous decoder refresh.
3053 static void idr(H264Context *h){
3056 for(i=0; i<16; i++){
3057 remove_long(h, i, 0);
3059 assert(h->long_ref_count==0);
3061 for(i=0; i<h->short_ref_count; i++){
3062 unreference_pic(h, h->short_ref[i], 0);
3063 h->short_ref[i]= NULL;
3065 h->short_ref_count=0;
3066 h->prev_frame_num= 0;
3067 h->prev_frame_num_offset= 0;
3072 /* forget old pics after a seek */
3073 static void flush_dpb(AVCodecContext *avctx){
3074 H264Context *h= avctx->priv_data;
3076 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3077 if(h->delayed_pic[i])
3078 h->delayed_pic[i]->reference= 0;
3079 h->delayed_pic[i]= NULL;
3081 h->outputed_poc= INT_MIN;
3083 if(h->s.current_picture_ptr)
3084 h->s.current_picture_ptr->reference= 0;
3085 h->s.first_field= 0;
3086 ff_mpeg_flush(avctx);
3090 * Find a Picture in the short term reference list by frame number.
3091 * @param frame_num frame number to search for
3092 * @param idx the index into h->short_ref where returned picture is found
3093 * undefined if no picture found.
3094 * @return pointer to the found picture, or NULL if no pic with the provided
3095 * frame number is found
3097 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3098 MpegEncContext * const s = &h->s;
3101 for(i=0; i<h->short_ref_count; i++){
3102 Picture *pic= h->short_ref[i];
3103 if(s->avctx->debug&FF_DEBUG_MMCO)
3104 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3105 if(pic->frame_num == frame_num) {
3114 * Remove a picture from the short term reference list by its index in
3115 * that list. This does no checking on the provided index; it is assumed
3116 * to be valid. Other list entries are shifted down.
3117 * @param i index into h->short_ref of picture to remove.
3119 static void remove_short_at_index(H264Context *h, int i){
3120 assert(i >= 0 && i < h->short_ref_count);
3121 h->short_ref[i]= NULL;
3122 if (--h->short_ref_count)
3123 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3128 * @return the removed picture or NULL if an error occurs
3130 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3131 MpegEncContext * const s = &h->s;
3135 if(s->avctx->debug&FF_DEBUG_MMCO)
3136 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3138 pic = find_short(h, frame_num, &i);
3140 if(unreference_pic(h, pic, ref_mask))
3141 remove_short_at_index(h, i);
3148 * Remove a picture from the long term reference list by its index in
3150 * @return the removed picture or NULL if an error occurs
3152 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3155 pic= h->long_ref[i];
3157 if(unreference_pic(h, pic, ref_mask)){
3158 assert(h->long_ref[i]->long_ref == 1);
3159 h->long_ref[i]->long_ref= 0;
3160 h->long_ref[i]= NULL;
3161 h->long_ref_count--;
3169 * print short term list
3171 static void print_short_term(H264Context *h) {
3173 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3174 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3175 for(i=0; i<h->short_ref_count; i++){
3176 Picture *pic= h->short_ref[i];
3177 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3183 * print long term list
3185 static void print_long_term(H264Context *h) {
3187 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3188 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3189 for(i = 0; i < 16; i++){
3190 Picture *pic= h->long_ref[i];
3192 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3199 * Executes the reference picture marking (memory management control operations).
3201 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3202 MpegEncContext * const s = &h->s;
3204 int current_ref_assigned=0;
3207 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3208 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3210 for(i=0; i<mmco_count; i++){
3211 int structure, frame_num;
3212 if(s->avctx->debug&FF_DEBUG_MMCO)
3213 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3215 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3216 || mmco[i].opcode == MMCO_SHORT2LONG){
3217 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3218 pic = find_short(h, frame_num, &j);
3220 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3221 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3222 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3227 switch(mmco[i].opcode){
3228 case MMCO_SHORT2UNUSED:
3229 if(s->avctx->debug&FF_DEBUG_MMCO)
3230 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3231 remove_short(h, frame_num, structure ^ PICT_FRAME);
3233 case MMCO_SHORT2LONG:
3234 if (h->long_ref[mmco[i].long_arg] != pic)
3235 remove_long(h, mmco[i].long_arg, 0);
3237 remove_short_at_index(h, j);
3238 h->long_ref[ mmco[i].long_arg ]= pic;
3239 if (h->long_ref[ mmco[i].long_arg ]){
3240 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3241 h->long_ref_count++;
3244 case MMCO_LONG2UNUSED:
3245 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3246 pic = h->long_ref[j];
3248 remove_long(h, j, structure ^ PICT_FRAME);
3249 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3250 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3253 // Comment below left from previous code as it is an interresting note.
3254 /* First field in pair is in short term list or
3255 * at a different long term index.
3256 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3257 * Report the problem and keep the pair where it is,
3258 * and mark this field valid.
3261 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3262 remove_long(h, mmco[i].long_arg, 0);
3264 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3265 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3266 h->long_ref_count++;
3269 s->current_picture_ptr->reference |= s->picture_structure;
3270 current_ref_assigned=1;
3272 case MMCO_SET_MAX_LONG:
3273 assert(mmco[i].long_arg <= 16);
3274 // just remove the long term which index is greater than new max
3275 for(j = mmco[i].long_arg; j<16; j++){
3276 remove_long(h, j, 0);
3280 while(h->short_ref_count){
3281 remove_short(h, h->short_ref[0]->frame_num, 0);
3283 for(j = 0; j < 16; j++) {
3284 remove_long(h, j, 0);
3286 s->current_picture_ptr->poc=
3287 s->current_picture_ptr->field_poc[0]=
3288 s->current_picture_ptr->field_poc[1]=
3292 s->current_picture_ptr->frame_num= 0;
3298 if (!current_ref_assigned) {
3299 /* Second field of complementary field pair; the first field of
3300 * which is already referenced. If short referenced, it
3301 * should be first entry in short_ref. If not, it must exist
3302 * in long_ref; trying to put it on the short list here is an
3303 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3305 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3306 /* Just mark the second field valid */
3307 s->current_picture_ptr->reference = PICT_FRAME;
3308 } else if (s->current_picture_ptr->long_ref) {
3309 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3310 "assignment for second field "
3311 "in complementary field pair "
3312 "(first field is long term)\n");
3314 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3316 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3319 if(h->short_ref_count)
3320 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3322 h->short_ref[0]= s->current_picture_ptr;
3323 h->short_ref_count++;
3324 s->current_picture_ptr->reference |= s->picture_structure;
3328 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3330 /* We have too many reference frames, probably due to corrupted
3331 * stream. Need to discard one frame. Prevents overrun of the
3332 * short_ref and long_ref buffers.
3334 av_log(h->s.avctx, AV_LOG_ERROR,
3335 "number of reference frames exceeds max (probably "
3336 "corrupt input), discarding one\n");
3338 if (h->long_ref_count && !h->short_ref_count) {
3339 for (i = 0; i < 16; ++i)
3344 remove_long(h, i, 0);
3346 pic = h->short_ref[h->short_ref_count - 1];
3347 remove_short(h, pic->frame_num, 0);
3351 print_short_term(h);
3356 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3357 MpegEncContext * const s = &h->s;
3361 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3362 s->broken_link= get_bits1(gb) -1;
3364 h->mmco[0].opcode= MMCO_LONG;
3365 h->mmco[0].long_arg= 0;
3369 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3370 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3371 MMCOOpcode opcode= get_ue_golomb(gb);
3373 h->mmco[i].opcode= opcode;
3374 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3375 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3376 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3377 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3381 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3382 unsigned int long_arg= get_ue_golomb(gb);
3383 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3384 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3387 h->mmco[i].long_arg= long_arg;
3390 if(opcode > (unsigned)MMCO_LONG){
3391 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3394 if(opcode == MMCO_END)
3399 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3401 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3402 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3403 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3404 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3406 if (FIELD_PICTURE) {
3407 h->mmco[0].short_pic_num *= 2;
3408 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3409 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3419 static int init_poc(H264Context *h){
3420 MpegEncContext * const s = &h->s;
3421 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3423 Picture *cur = s->current_picture_ptr;
3425 h->frame_num_offset= h->prev_frame_num_offset;
3426 if(h->frame_num < h->prev_frame_num)
3427 h->frame_num_offset += max_frame_num;
3429 if(h->sps.poc_type==0){
3430 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3432 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3433 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3434 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3435 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3437 h->poc_msb = h->prev_poc_msb;
3438 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3440 field_poc[1] = h->poc_msb + h->poc_lsb;
3441 if(s->picture_structure == PICT_FRAME)
3442 field_poc[1] += h->delta_poc_bottom;
3443 }else if(h->sps.poc_type==1){
3444 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3447 if(h->sps.poc_cycle_length != 0)
3448 abs_frame_num = h->frame_num_offset + h->frame_num;
3452 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3455 expected_delta_per_poc_cycle = 0;
3456 for(i=0; i < h->sps.poc_cycle_length; i++)
3457 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3459 if(abs_frame_num > 0){
3460 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3461 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3463 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3464 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3465 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3469 if(h->nal_ref_idc == 0)
3470 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3472 field_poc[0] = expectedpoc + h->delta_poc[0];
3473 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3475 if(s->picture_structure == PICT_FRAME)
3476 field_poc[1] += h->delta_poc[1];
3478 int poc= 2*(h->frame_num_offset + h->frame_num);
3487 if(s->picture_structure != PICT_BOTTOM_FIELD)
3488 s->current_picture_ptr->field_poc[0]= field_poc[0];
3489 if(s->picture_structure != PICT_TOP_FIELD)
3490 s->current_picture_ptr->field_poc[1]= field_poc[1];
3491 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3498 * initialize scan tables
3500 static void init_scan_tables(H264Context *h){
3501 MpegEncContext * const s = &h->s;
3503 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3504 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3505 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3507 for(i=0; i<16; i++){
3508 #define T(x) (x>>2) | ((x<<2) & 0xF)
3509 h->zigzag_scan[i] = T(zigzag_scan[i]);
3510 h-> field_scan[i] = T( field_scan[i]);
3514 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3515 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3516 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3517 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3518 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3520 for(i=0; i<64; i++){
3521 #define T(x) (x>>3) | ((x&7)<<3)
3522 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3523 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3524 h->field_scan8x8[i] = T(field_scan8x8[i]);
3525 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3529 if(h->sps.transform_bypass){ //FIXME same ugly
3530 h->zigzag_scan_q0 = zigzag_scan;
3531 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3532 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3533 h->field_scan_q0 = field_scan;
3534 h->field_scan8x8_q0 = field_scan8x8;
3535 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3537 h->zigzag_scan_q0 = h->zigzag_scan;
3538 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3539 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3540 h->field_scan_q0 = h->field_scan;
3541 h->field_scan8x8_q0 = h->field_scan8x8;
3542 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3547 * Replicates H264 "master" context to thread contexts.
3549 static void clone_slice(H264Context *dst, H264Context *src)
3551 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3552 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3553 dst->s.current_picture = src->s.current_picture;
3554 dst->s.linesize = src->s.linesize;
3555 dst->s.uvlinesize = src->s.uvlinesize;
3556 dst->s.first_field = src->s.first_field;
3558 dst->prev_poc_msb = src->prev_poc_msb;
3559 dst->prev_poc_lsb = src->prev_poc_lsb;
3560 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3561 dst->prev_frame_num = src->prev_frame_num;
3562 dst->short_ref_count = src->short_ref_count;
3564 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3565 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3566 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3567 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3569 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3570 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3574 * decodes a slice header.
3575 * This will also call MPV_common_init() and frame_start() as needed.
3577 * @param h h264context
3578 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3580 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3582 static int decode_slice_header(H264Context *h, H264Context *h0){
3583 MpegEncContext * const s = &h->s;
3584 MpegEncContext * const s0 = &h0->s;
3585 unsigned int first_mb_in_slice;
3586 unsigned int pps_id;
3587 int num_ref_idx_active_override_flag;
3588 unsigned int slice_type, tmp, i, j;
3589 int default_ref_list_done = 0;
3590 int last_pic_structure;
3592 s->dropable= h->nal_ref_idc == 0;
3594 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3595 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3596 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3598 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3599 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3602 first_mb_in_slice= get_ue_golomb(&s->gb);
3604 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3605 h0->current_slice = 0;
3606 if (!s0->first_field)
3607 s->current_picture_ptr= NULL;
3610 slice_type= get_ue_golomb(&s->gb);
3612 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3617 h->slice_type_fixed=1;
3619 h->slice_type_fixed=0;
3621 slice_type= golomb_to_pict_type[ slice_type ];
3622 if (slice_type == FF_I_TYPE
3623 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3624 default_ref_list_done = 1;
3626 h->slice_type= slice_type;
3627 h->slice_type_nos= slice_type & 3;
3629 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3630 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3631 av_log(h->s.avctx, AV_LOG_ERROR,
3632 "B picture before any references, skipping\n");
3636 pps_id= get_ue_golomb(&s->gb);
3637 if(pps_id>=MAX_PPS_COUNT){
3638 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3641 if(!h0->pps_buffers[pps_id]) {
3642 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3645 h->pps= *h0->pps_buffers[pps_id];
3647 if(!h0->sps_buffers[h->pps.sps_id]) {
3648 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3651 h->sps = *h0->sps_buffers[h->pps.sps_id];
3653 if(h == h0 && h->dequant_coeff_pps != pps_id){
3654 h->dequant_coeff_pps = pps_id;
3655 init_dequant_tables(h);
3658 s->mb_width= h->sps.mb_width;
3659 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3661 h->b_stride= s->mb_width*4;
3662 h->b8_stride= s->mb_width*2;
3664 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3665 if(h->sps.frame_mbs_only_flag)
3666 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3668 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3670 if (s->context_initialized
3671 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3673 return -1; // width / height changed during parallelized decoding
3675 flush_dpb(s->avctx);
3678 if (!s->context_initialized) {
3680 return -1; // we cant (re-)initialize context during parallel decoding
3681 if (MPV_common_init(s) < 0)
3685 init_scan_tables(h);
3688 for(i = 1; i < s->avctx->thread_count; i++) {
3690 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3691 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3692 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3695 init_scan_tables(c);
3699 for(i = 0; i < s->avctx->thread_count; i++)
3700 if(context_init(h->thread_context[i]) < 0)
3703 s->avctx->width = s->width;
3704 s->avctx->height = s->height;
3705 s->avctx->sample_aspect_ratio= h->sps.sar;
3706 if(!s->avctx->sample_aspect_ratio.den)
3707 s->avctx->sample_aspect_ratio.den = 1;
3709 if(h->sps.timing_info_present_flag){
3710 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3711 if(h->x264_build > 0 && h->x264_build < 44)
3712 s->avctx->time_base.den *= 2;
3713 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3714 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3718 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3721 h->mb_aff_frame = 0;
3722 last_pic_structure = s0->picture_structure;
3723 if(h->sps.frame_mbs_only_flag){
3724 s->picture_structure= PICT_FRAME;
3726 if(get_bits1(&s->gb)) { //field_pic_flag
3727 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3729 s->picture_structure= PICT_FRAME;
3730 h->mb_aff_frame = h->sps.mb_aff;
3733 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3735 if(h0->current_slice == 0){
3736 while(h->frame_num != h->prev_frame_num &&
3737 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3738 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3740 h->prev_frame_num++;
3741 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3742 s->current_picture_ptr->frame_num= h->prev_frame_num;
3743 execute_ref_pic_marking(h, NULL, 0);
3746 /* See if we have a decoded first field looking for a pair... */
3747 if (s0->first_field) {
3748 assert(s0->current_picture_ptr);
3749 assert(s0->current_picture_ptr->data[0]);
3750 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3752 /* figure out if we have a complementary field pair */
3753 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3755 * Previous field is unmatched. Don't display it, but let it
3756 * remain for reference if marked as such.
3758 s0->current_picture_ptr = NULL;
3759 s0->first_field = FIELD_PICTURE;
3762 if (h->nal_ref_idc &&
3763 s0->current_picture_ptr->reference &&
3764 s0->current_picture_ptr->frame_num != h->frame_num) {
3766 * This and previous field were reference, but had
3767 * different frame_nums. Consider this field first in
3768 * pair. Throw away previous field except for reference
3771 s0->first_field = 1;
3772 s0->current_picture_ptr = NULL;
3775 /* Second field in complementary pair */
3776 s0->first_field = 0;
3781 /* Frame or first field in a potentially complementary pair */
3782 assert(!s0->current_picture_ptr);
3783 s0->first_field = FIELD_PICTURE;
3786 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3787 s0->first_field = 0;
3794 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3796 assert(s->mb_num == s->mb_width * s->mb_height);
3797 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3798 first_mb_in_slice >= s->mb_num){
3799 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3802 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3803 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3804 if (s->picture_structure == PICT_BOTTOM_FIELD)
3805 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3806 assert(s->mb_y < s->mb_height);
3808 if(s->picture_structure==PICT_FRAME){
3809 h->curr_pic_num= h->frame_num;
3810 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3812 h->curr_pic_num= 2*h->frame_num + 1;
3813 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3816 if(h->nal_unit_type == NAL_IDR_SLICE){
3817 get_ue_golomb(&s->gb); /* idr_pic_id */
3820 if(h->sps.poc_type==0){
3821 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3823 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3824 h->delta_poc_bottom= get_se_golomb(&s->gb);
3828 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3829 h->delta_poc[0]= get_se_golomb(&s->gb);
3831 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3832 h->delta_poc[1]= get_se_golomb(&s->gb);
3837 if(h->pps.redundant_pic_cnt_present){
3838 h->redundant_pic_count= get_ue_golomb(&s->gb);
3841 //set defaults, might be overridden a few lines later
3842 h->ref_count[0]= h->pps.ref_count[0];
3843 h->ref_count[1]= h->pps.ref_count[1];
3845 if(h->slice_type_nos != FF_I_TYPE){
3846 if(h->slice_type_nos == FF_B_TYPE){
3847 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3849 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3851 if(num_ref_idx_active_override_flag){
3852 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3853 if(h->slice_type_nos==FF_B_TYPE)
3854 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3856 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3857 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3858 h->ref_count[0]= h->ref_count[1]= 1;
3862 if(h->slice_type_nos == FF_B_TYPE)
3869 if(!default_ref_list_done){
3870 fill_default_ref_list(h);
3873 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3876 if(h->slice_type_nos!=FF_I_TYPE){
3877 s->last_picture_ptr= &h->ref_list[0][0];
3878 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3880 if(h->slice_type_nos==FF_B_TYPE){
3881 s->next_picture_ptr= &h->ref_list[1][0];
3882 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3885 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3886 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3887 pred_weight_table(h);
3888 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3889 implicit_weight_table(h);
3894 decode_ref_pic_marking(h0, &s->gb);
3897 fill_mbaff_ref_list(h);
3899 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3900 direct_dist_scale_factor(h);
3901 direct_ref_list_init(h);
3903 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3904 tmp = get_ue_golomb(&s->gb);
3906 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3909 h->cabac_init_idc= tmp;
3912 h->last_qscale_diff = 0;
3913 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3915 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3919 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3920 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3921 //FIXME qscale / qp ... stuff
3922 if(h->slice_type == FF_SP_TYPE){
3923 get_bits1(&s->gb); /* sp_for_switch_flag */
3925 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3926 get_se_golomb(&s->gb); /* slice_qs_delta */
3929 h->deblocking_filter = 1;
3930 h->slice_alpha_c0_offset = 0;
3931 h->slice_beta_offset = 0;
3932 if( h->pps.deblocking_filter_parameters_present ) {
3933 tmp= get_ue_golomb(&s->gb);
3935 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3938 h->deblocking_filter= tmp;
3939 if(h->deblocking_filter < 2)
3940 h->deblocking_filter^= 1; // 1<->0
3942 if( h->deblocking_filter ) {
3943 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3944 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3948 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3949 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3950 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3951 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3952 h->deblocking_filter= 0;
3954 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3955 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3956 /* Cheat slightly for speed:
3957 Do not bother to deblock across slices. */
3958 h->deblocking_filter = 2;
3960 h0->max_contexts = 1;
3961 if(!h0->single_decode_warning) {
3962 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3963 h0->single_decode_warning = 1;
3966 return 1; // deblocking switched inside frame
3971 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3972 slice_group_change_cycle= get_bits(&s->gb, ?);
3975 h0->last_slice_type = slice_type;
3976 h->slice_num = ++h0->current_slice;
3977 if(h->slice_num >= MAX_SLICES){
3978 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3982 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3986 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3987 +(h->ref_list[j][i].reference&3);
3990 for(i=16; i<48; i++)
3991 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3992 +(h->ref_list[j][i].reference&3);
3995 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3996 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3998 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3999 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4001 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4003 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4004 pps_id, h->frame_num,
4005 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4006 h->ref_count[0], h->ref_count[1],
4008 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4010 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4011 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4021 static inline int get_level_prefix(GetBitContext *gb){
4025 OPEN_READER(re, gb);
4026 UPDATE_CACHE(re, gb);
4027 buf=GET_CACHE(re, gb);
4029 log= 32 - av_log2(buf);
4031 print_bin(buf>>(32-log), log);
4032 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4035 LAST_SKIP_BITS(re, gb, log);
4036 CLOSE_READER(re, gb);
4041 static inline int get_dct8x8_allowed(H264Context *h){
4044 if(!IS_SUB_8X8(h->sub_mb_type[i])
4045 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4052 * decodes a residual block.
4053 * @param n block index
4054 * @param scantable scantable
4055 * @param max_coeff number of coefficients in the block
4056 * @return <0 if an error occurred
4058 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4059 MpegEncContext * const s = &h->s;
4060 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4062 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4064 //FIXME put trailing_onex into the context
4066 if(n == CHROMA_DC_BLOCK_INDEX){
4067 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4068 total_coeff= coeff_token>>2;
4070 if(n == LUMA_DC_BLOCK_INDEX){
4071 total_coeff= pred_non_zero_count(h, 0);
4072 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4073 total_coeff= coeff_token>>2;
4075 total_coeff= pred_non_zero_count(h, n);
4076 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4077 total_coeff= coeff_token>>2;
4078 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4082 //FIXME set last_non_zero?
4086 if(total_coeff > (unsigned)max_coeff) {
4087 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4091 trailing_ones= coeff_token&3;
4092 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4093 assert(total_coeff<=16);
4095 i = show_bits(gb, 3);
4096 skip_bits(gb, trailing_ones);
4097 level[0] = 1-((i&4)>>1);
4098 level[1] = 1-((i&2) );
4099 level[2] = 1-((i&1)<<1);
4101 if(trailing_ones<total_coeff) {
4102 int level_code, mask;
4103 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4104 int prefix= get_level_prefix(gb);
4106 //first coefficient has suffix_length equal to 0 or 1
4107 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4109 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4111 level_code= (prefix<<suffix_length); //part
4112 }else if(prefix==14){
4114 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4116 level_code= prefix + get_bits(gb, 4); //part
4118 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4119 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4121 level_code += (1<<(prefix-3))-4096;
4124 if(trailing_ones < 3) level_code += 2;
4129 mask= -(level_code&1);
4130 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4132 //remaining coefficients have suffix_length > 0
4133 for(i=trailing_ones+1;i<total_coeff;i++) {
4134 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4135 prefix = get_level_prefix(gb);
4137 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4139 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4141 level_code += (1<<(prefix-3))-4096;
4143 mask= -(level_code&1);
4144 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4145 if(level_code > suffix_limit[suffix_length])
4150 if(total_coeff == max_coeff)
4153 if(n == CHROMA_DC_BLOCK_INDEX)
4154 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4156 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4159 coeff_num = zeros_left + total_coeff - 1;
4160 j = scantable[coeff_num];
4162 block[j] = level[0];
4163 for(i=1;i<total_coeff;i++) {
4166 else if(zeros_left < 7){
4167 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4169 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4171 zeros_left -= run_before;
4172 coeff_num -= 1 + run_before;
4173 j= scantable[ coeff_num ];
4178 block[j] = (level[0] * qmul[j] + 32)>>6;
4179 for(i=1;i<total_coeff;i++) {
4182 else if(zeros_left < 7){
4183 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4185 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4187 zeros_left -= run_before;
4188 coeff_num -= 1 + run_before;
4189 j= scantable[ coeff_num ];
4191 block[j]= (level[i] * qmul[j] + 32)>>6;
4196 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4203 static void predict_field_decoding_flag(H264Context *h){
4204 MpegEncContext * const s = &h->s;
4205 const int mb_xy= h->mb_xy;
4206 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4207 ? s->current_picture.mb_type[mb_xy-1]
4208 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4209 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4211 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4215 * decodes a P_SKIP or B_SKIP macroblock
4217 static void decode_mb_skip(H264Context *h){
4218 MpegEncContext * const s = &h->s;
4219 const int mb_xy= h->mb_xy;
4222 memset(h->non_zero_count[mb_xy], 0, 16);
4223 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4226 mb_type|= MB_TYPE_INTERLACED;
4228 if( h->slice_type_nos == FF_B_TYPE )
4230 // just for fill_caches. pred_direct_motion will set the real mb_type
4231 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4233 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4234 pred_direct_motion(h, &mb_type);
4235 mb_type|= MB_TYPE_SKIP;
4240 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4242 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4243 pred_pskip_motion(h, &mx, &my);
4244 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4245 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4248 write_back_motion(h, mb_type);
4249 s->current_picture.mb_type[mb_xy]= mb_type;
4250 s->current_picture.qscale_table[mb_xy]= s->qscale;
4251 h->slice_table[ mb_xy ]= h->slice_num;
4252 h->prev_mb_skipped= 1;
4256 * decodes a macroblock
4257 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4259 static int decode_mb_cavlc(H264Context *h){
4260 MpegEncContext * const s = &h->s;
4262 int partition_count;
4263 unsigned int mb_type, cbp;
4264 int dct8x8_allowed= h->pps.transform_8x8_mode;
4266 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4268 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4270 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4271 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4273 if(h->slice_type_nos != FF_I_TYPE){
4274 if(s->mb_skip_run==-1)
4275 s->mb_skip_run= get_ue_golomb(&s->gb);
4277 if (s->mb_skip_run--) {
4278 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4279 if(s->mb_skip_run==0)
4280 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4282 predict_field_decoding_flag(h);
4289 if( (s->mb_y&1) == 0 )
4290 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4293 h->prev_mb_skipped= 0;
4295 mb_type= get_ue_golomb(&s->gb);
4296 if(h->slice_type_nos == FF_B_TYPE){
4298 partition_count= b_mb_type_info[mb_type].partition_count;
4299 mb_type= b_mb_type_info[mb_type].type;
4302 goto decode_intra_mb;
4304 }else if(h->slice_type_nos == FF_P_TYPE){
4306 partition_count= p_mb_type_info[mb_type].partition_count;
4307 mb_type= p_mb_type_info[mb_type].type;
4310 goto decode_intra_mb;
4313 assert(h->slice_type_nos == FF_I_TYPE);
4314 if(h->slice_type == FF_SI_TYPE && mb_type)
4318 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4322 cbp= i_mb_type_info[mb_type].cbp;
4323 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4324 mb_type= i_mb_type_info[mb_type].type;
4328 mb_type |= MB_TYPE_INTERLACED;
4330 h->slice_table[ mb_xy ]= h->slice_num;
4332 if(IS_INTRA_PCM(mb_type)){
4335 // We assume these blocks are very rare so we do not optimize it.
4336 align_get_bits(&s->gb);
4338 // The pixels are stored in the same order as levels in h->mb array.
4339 for(x=0; x < (CHROMA ? 384 : 256); x++){
4340 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4343 // In deblocking, the quantizer is 0
4344 s->current_picture.qscale_table[mb_xy]= 0;
4345 // All coeffs are present
4346 memset(h->non_zero_count[mb_xy], 16, 16);
4348 s->current_picture.mb_type[mb_xy]= mb_type;
4353 h->ref_count[0] <<= 1;
4354 h->ref_count[1] <<= 1;
4357 fill_caches(h, mb_type, 0);
4360 if(IS_INTRA(mb_type)){
4362 // init_top_left_availability(h);
4363 if(IS_INTRA4x4(mb_type)){
4366 if(dct8x8_allowed && get_bits1(&s->gb)){
4367 mb_type |= MB_TYPE_8x8DCT;
4371 // fill_intra4x4_pred_table(h);
4372 for(i=0; i<16; i+=di){
4373 int mode= pred_intra_mode(h, i);
4375 if(!get_bits1(&s->gb)){
4376 const int rem_mode= get_bits(&s->gb, 3);
4377 mode = rem_mode + (rem_mode >= mode);
4381 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4383 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4385 write_back_intra_pred_mode(h);
4386 if( check_intra4x4_pred_mode(h) < 0)
4389 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4390 if(h->intra16x16_pred_mode < 0)
4394 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4397 h->chroma_pred_mode= pred_mode;
4399 }else if(partition_count==4){
4400 int i, j, sub_partition_count[4], list, ref[2][4];
4402 if(h->slice_type_nos == FF_B_TYPE){
4404 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4405 if(h->sub_mb_type[i] >=13){
4406 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4409 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4410 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4412 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4413 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4414 pred_direct_motion(h, &mb_type);
4415 h->ref_cache[0][scan8[4]] =
4416 h->ref_cache[1][scan8[4]] =
4417 h->ref_cache[0][scan8[12]] =
4418 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4421 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4423 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4424 if(h->sub_mb_type[i] >=4){
4425 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4428 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4429 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4433 for(list=0; list<h->list_count; list++){
4434 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4436 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4437 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4438 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4440 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4452 dct8x8_allowed = get_dct8x8_allowed(h);
4454 for(list=0; list<h->list_count; list++){
4456 if(IS_DIRECT(h->sub_mb_type[i])) {
4457 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4460 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4461 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4463 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4464 const int sub_mb_type= h->sub_mb_type[i];
4465 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4466 for(j=0; j<sub_partition_count[i]; j++){
4468 const int index= 4*i + block_width*j;
4469 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4470 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4471 mx += get_se_golomb(&s->gb);
4472 my += get_se_golomb(&s->gb);
4473 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4475 if(IS_SUB_8X8(sub_mb_type)){
4477 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4479 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4480 }else if(IS_SUB_8X4(sub_mb_type)){
4481 mv_cache[ 1 ][0]= mx;
4482 mv_cache[ 1 ][1]= my;
4483 }else if(IS_SUB_4X8(sub_mb_type)){
4484 mv_cache[ 8 ][0]= mx;
4485 mv_cache[ 8 ][1]= my;
4487 mv_cache[ 0 ][0]= mx;
4488 mv_cache[ 0 ][1]= my;
4491 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4497 }else if(IS_DIRECT(mb_type)){
4498 pred_direct_motion(h, &mb_type);
4499 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4501 int list, mx, my, i;
4502 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4503 if(IS_16X16(mb_type)){
4504 for(list=0; list<h->list_count; list++){
4506 if(IS_DIR(mb_type, 0, list)){
4507 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4508 if(val >= h->ref_count[list]){
4509 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4513 val= LIST_NOT_USED&0xFF;
4514 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4516 for(list=0; list<h->list_count; list++){
4518 if(IS_DIR(mb_type, 0, list)){
4519 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4520 mx += get_se_golomb(&s->gb);
4521 my += get_se_golomb(&s->gb);
4522 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4524 val= pack16to32(mx,my);
4527 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4530 else if(IS_16X8(mb_type)){
4531 for(list=0; list<h->list_count; list++){
4534 if(IS_DIR(mb_type, i, list)){
4535 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4536 if(val >= h->ref_count[list]){
4537 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4541 val= LIST_NOT_USED&0xFF;
4542 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4545 for(list=0; list<h->list_count; list++){
4548 if(IS_DIR(mb_type, i, list)){
4549 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4550 mx += get_se_golomb(&s->gb);
4551 my += get_se_golomb(&s->gb);
4552 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4554 val= pack16to32(mx,my);
4557 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4561 assert(IS_8X16(mb_type));
4562 for(list=0; list<h->list_count; list++){
4565 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4566 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4567 if(val >= h->ref_count[list]){
4568 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4572 val= LIST_NOT_USED&0xFF;
4573 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4576 for(list=0; list<h->list_count; list++){
4579 if(IS_DIR(mb_type, i, list)){
4580 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4581 mx += get_se_golomb(&s->gb);
4582 my += get_se_golomb(&s->gb);
4583 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4585 val= pack16to32(mx,my);
4588 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4594 if(IS_INTER(mb_type))
4595 write_back_motion(h, mb_type);
4597 if(!IS_INTRA16x16(mb_type)){
4598 cbp= get_ue_golomb(&s->gb);
4600 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4605 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4606 else cbp= golomb_to_inter_cbp [cbp];
4608 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4609 else cbp= golomb_to_inter_cbp_gray[cbp];
4614 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4615 if(get_bits1(&s->gb)){
4616 mb_type |= MB_TYPE_8x8DCT;
4617 h->cbp_table[mb_xy]= cbp;
4620 s->current_picture.mb_type[mb_xy]= mb_type;
4622 if(cbp || IS_INTRA16x16(mb_type)){
4623 int i8x8, i4x4, chroma_idx;
4625 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4626 const uint8_t *scan, *scan8x8, *dc_scan;
4628 // fill_non_zero_count_cache(h);
4630 if(IS_INTERLACED(mb_type)){
4631 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4632 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4633 dc_scan= luma_dc_field_scan;
4635 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4636 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4637 dc_scan= luma_dc_zigzag_scan;
4640 dquant= get_se_golomb(&s->gb);
4642 if( dquant > 25 || dquant < -26 ){
4643 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4647 s->qscale += dquant;
4648 if(((unsigned)s->qscale) > 51){
4649 if(s->qscale<0) s->qscale+= 52;
4650 else s->qscale-= 52;
4653 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4654 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4655 if(IS_INTRA16x16(mb_type)){
4656 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4657 return -1; //FIXME continue if partitioned and other return -1 too
4660 assert((cbp&15) == 0 || (cbp&15) == 15);
4663 for(i8x8=0; i8x8<4; i8x8++){
4664 for(i4x4=0; i4x4<4; i4x4++){
4665 const int index= i4x4 + 4*i8x8;
4666 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4672 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4675 for(i8x8=0; i8x8<4; i8x8++){
4676 if(cbp & (1<<i8x8)){
4677 if(IS_8x8DCT(mb_type)){
4678 DCTELEM *buf = &h->mb[64*i8x8];
4680 for(i4x4=0; i4x4<4; i4x4++){
4681 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4682 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4685 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4686 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4688 for(i4x4=0; i4x4<4; i4x4++){
4689 const int index= i4x4 + 4*i8x8;
4691 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4697 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4698 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4704 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4705 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4711 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4712 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4713 for(i4x4=0; i4x4<4; i4x4++){
4714 const int index= 16 + 4*chroma_idx + i4x4;
4715 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4721 uint8_t * const nnz= &h->non_zero_count_cache[0];
4722 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4723 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4726 uint8_t * const nnz= &h->non_zero_count_cache[0];
4727 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4728 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4729 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4731 s->current_picture.qscale_table[mb_xy]= s->qscale;
4732 write_back_non_zero_count(h);
4735 h->ref_count[0] >>= 1;
4736 h->ref_count[1] >>= 1;
4742 static int decode_cabac_field_decoding_flag(H264Context *h) {
4743 MpegEncContext * const s = &h->s;
4744 const int mb_x = s->mb_x;
4745 const int mb_y = s->mb_y & ~1;
4746 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4747 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4749 unsigned int ctx = 0;
4751 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4754 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4758 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4761 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4762 uint8_t *state= &h->cabac_state[ctx_base];
4766 MpegEncContext * const s = &h->s;
4767 const int mba_xy = h->left_mb_xy[0];
4768 const int mbb_xy = h->top_mb_xy;
4770 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4772 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4774 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4775 return 0; /* I4x4 */
4778 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4779 return 0; /* I4x4 */
4782 if( get_cabac_terminate( &h->cabac ) )
4783 return 25; /* PCM */
4785 mb_type = 1; /* I16x16 */
4786 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4787 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4788 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4789 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4790 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4794 static int decode_cabac_mb_type( H264Context *h ) {
4795 MpegEncContext * const s = &h->s;
4797 if( h->slice_type_nos == FF_I_TYPE ) {
4798 return decode_cabac_intra_mb_type(h, 3, 1);
4799 } else if( h->slice_type_nos == FF_P_TYPE ) {
4800 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4802 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4803 /* P_L0_D16x16, P_8x8 */
4804 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4806 /* P_L0_D8x16, P_L0_D16x8 */
4807 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4810 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4813 const int mba_xy = h->left_mb_xy[0];
4814 const int mbb_xy = h->top_mb_xy;
4817 assert(h->slice_type_nos == FF_B_TYPE);
4819 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4821 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4824 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4825 return 0; /* B_Direct_16x16 */
4827 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4828 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4831 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4832 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4833 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4834 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4836 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4837 else if( bits == 13 ) {
4838 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4839 } else if( bits == 14 )
4840 return 11; /* B_L1_L0_8x16 */
4841 else if( bits == 15 )
4842 return 22; /* B_8x8 */
4844 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4845 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4849 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4850 MpegEncContext * const s = &h->s;
4854 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4855 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4858 && h->slice_table[mba_xy] == h->slice_num
4859 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4860 mba_xy += s->mb_stride;
4862 mbb_xy = mb_xy - s->mb_stride;
4864 && h->slice_table[mbb_xy] == h->slice_num
4865 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4866 mbb_xy -= s->mb_stride;
4868 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4870 int mb_xy = h->mb_xy;
4872 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4875 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4877 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4880 if( h->slice_type_nos == FF_B_TYPE )
4882 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4885 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4888 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4891 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4892 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4893 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4895 if( mode >= pred_mode )
4901 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4902 const int mba_xy = h->left_mb_xy[0];
4903 const int mbb_xy = h->top_mb_xy;
4907 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4908 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4911 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4914 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4917 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4919 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4925 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4926 int cbp_b, cbp_a, ctx, cbp = 0;
4928 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4929 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4931 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4932 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4933 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4934 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4935 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4936 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4937 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4938 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4941 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4945 cbp_a = (h->left_cbp>>4)&0x03;
4946 cbp_b = (h-> top_cbp>>4)&0x03;
4949 if( cbp_a > 0 ) ctx++;
4950 if( cbp_b > 0 ) ctx += 2;
4951 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4955 if( cbp_a == 2 ) ctx++;
4956 if( cbp_b == 2 ) ctx += 2;
4957 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4959 static int decode_cabac_mb_dqp( H264Context *h) {
4960 int ctx= h->last_qscale_diff != 0;
4963 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4966 if(val > 102) //prevent infinite loop
4971 return (val + 1)>>1 ;
4973 return -((val + 1)>>1);
4975 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4976 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4978 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4980 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4984 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4986 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4987 return 0; /* B_Direct_8x8 */
4988 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4989 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4991 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4992 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4993 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4996 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4997 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5001 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5002 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5005 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5006 int refa = h->ref_cache[list][scan8[n] - 1];
5007 int refb = h->ref_cache[list][scan8[n] - 8];
5011 if( h->slice_type_nos == FF_B_TYPE) {
5012 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5014 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5023 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5029 if(ref >= 32 /*h->ref_list[list]*/){
5036 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5037 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5038 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5039 int ctxbase = (l == 0) ? 40 : 47;
5041 int ctx = (amvd>2) + (amvd>32);
5043 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5048 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5056 while( get_cabac_bypass( &h->cabac ) ) {
5060 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5065 if( get_cabac_bypass( &h->cabac ) )
5069 return get_cabac_bypass_sign( &h->cabac, -mvd );
5072 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5078 nza = h->left_cbp&0x100;
5079 nzb = h-> top_cbp&0x100;
5081 nza = (h->left_cbp>>(6+idx))&0x01;
5082 nzb = (h-> top_cbp>>(6+idx))&0x01;
5085 assert(cat == 1 || cat == 2 || cat == 4);
5086 nza = h->non_zero_count_cache[scan8[idx] - 1];
5087 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5096 return ctx + 4 * cat;
5099 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5100 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5102 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5103 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5106 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5107 static const int significant_coeff_flag_offset[2][6] = {
5108 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5109 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5111 static const int last_coeff_flag_offset[2][6] = {
5112 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5113 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5115 static const int coeff_abs_level_m1_offset[6] = {
5116 227+0, 227+10, 227+20, 227+30, 227+39, 426
5118 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5119 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5120 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5121 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5122 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5123 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5124 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5125 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5126 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5128 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5129 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5130 * map node ctx => cabac ctx for level=1 */
5131 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5132 /* map node ctx => cabac ctx for level>1 */
5133 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5134 static const uint8_t coeff_abs_level_transition[2][8] = {
5135 /* update node ctx after decoding a level=1 */
5136 { 1, 2, 3, 3, 4, 5, 6, 7 },
5137 /* update node ctx after decoding a level>1 */
5138 { 4, 4, 4, 4, 5, 6, 7, 7 }
5144 int coeff_count = 0;
5147 uint8_t *significant_coeff_ctx_base;
5148 uint8_t *last_coeff_ctx_base;
5149 uint8_t *abs_level_m1_ctx_base;
5152 #define CABAC_ON_STACK
5154 #ifdef CABAC_ON_STACK
5157 cc.range = h->cabac.range;
5158 cc.low = h->cabac.low;
5159 cc.bytestream= h->cabac.bytestream;
5161 #define CC &h->cabac
5165 /* cat: 0-> DC 16x16 n = 0
5166 * 1-> AC 16x16 n = luma4x4idx
5167 * 2-> Luma4x4 n = luma4x4idx
5168 * 3-> DC Chroma n = iCbCr
5169 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5170 * 5-> Luma8x8 n = 4 * luma8x8idx
5173 /* read coded block flag */
5174 if( is_dc || cat != 5 ) {
5175 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5177 h->non_zero_count_cache[scan8[n]] = 0;
5179 #ifdef CABAC_ON_STACK
5180 h->cabac.range = cc.range ;
5181 h->cabac.low = cc.low ;
5182 h->cabac.bytestream= cc.bytestream;
5188 significant_coeff_ctx_base = h->cabac_state
5189 + significant_coeff_flag_offset[MB_FIELD][cat];
5190 last_coeff_ctx_base = h->cabac_state
5191 + last_coeff_flag_offset[MB_FIELD][cat];
5192 abs_level_m1_ctx_base = h->cabac_state
5193 + coeff_abs_level_m1_offset[cat];
5195 if( !is_dc && cat == 5 ) {
5196 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5197 for(last= 0; last < coefs; last++) { \
5198 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5199 if( get_cabac( CC, sig_ctx )) { \
5200 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5201 index[coeff_count++] = last; \
5202 if( get_cabac( CC, last_ctx ) ) { \
5208 if( last == max_coeff -1 ) {\
5209 index[coeff_count++] = last;\
5211 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5212 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5213 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5215 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5217 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5219 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5222 assert(coeff_count > 0);
5226 h->cbp_table[h->mb_xy] |= 0x100;
5228 h->cbp_table[h->mb_xy] |= 0x40 << n;
5231 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5233 assert( cat == 1 || cat == 2 || cat == 4 );
5234 h->non_zero_count_cache[scan8[n]] = coeff_count;
5239 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5241 int j= scantable[index[--coeff_count]];
5243 if( get_cabac( CC, ctx ) == 0 ) {
5244 node_ctx = coeff_abs_level_transition[0][node_ctx];
5246 block[j] = get_cabac_bypass_sign( CC, -1);
5248 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5252 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5253 node_ctx = coeff_abs_level_transition[1][node_ctx];
5255 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5259 if( coeff_abs >= 15 ) {
5261 while( get_cabac_bypass( CC ) ) {
5267 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5273 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5275 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5278 } while( coeff_count );
5279 #ifdef CABAC_ON_STACK
5280 h->cabac.range = cc.range ;
5281 h->cabac.low = cc.low ;
5282 h->cabac.bytestream= cc.bytestream;
5287 #ifndef CONFIG_SMALL
5288 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5289 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5292 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5293 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5297 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5299 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5301 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5302 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5306 static inline void compute_mb_neighbors(H264Context *h)
5308 MpegEncContext * const s = &h->s;
5309 const int mb_xy = h->mb_xy;
5310 h->top_mb_xy = mb_xy - s->mb_stride;
5311 h->left_mb_xy[0] = mb_xy - 1;
5313 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5314 const int top_pair_xy = pair_xy - s->mb_stride;
5315 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5316 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5317 const int curr_mb_frame_flag = !MB_FIELD;
5318 const int bottom = (s->mb_y & 1);
5320 ? !curr_mb_frame_flag // bottom macroblock
5321 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5323 h->top_mb_xy -= s->mb_stride;
5325 if (left_mb_frame_flag != curr_mb_frame_flag) {
5326 h->left_mb_xy[0] = pair_xy - 1;
5328 } else if (FIELD_PICTURE) {
5329 h->top_mb_xy -= s->mb_stride;
5335 * decodes a macroblock
5336 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5338 static int decode_mb_cabac(H264Context *h) {
5339 MpegEncContext * const s = &h->s;
5341 int mb_type, partition_count, cbp = 0;
5342 int dct8x8_allowed= h->pps.transform_8x8_mode;
5344 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5346 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5348 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5349 if( h->slice_type_nos != FF_I_TYPE ) {
5351 /* a skipped mb needs the aff flag from the following mb */
5352 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5353 predict_field_decoding_flag(h);
5354 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5355 skip = h->next_mb_skipped;
5357 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5358 /* read skip flags */
5360 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5361 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5362 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5363 if(h->next_mb_skipped)
5364 predict_field_decoding_flag(h);
5366 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5371 h->cbp_table[mb_xy] = 0;
5372 h->chroma_pred_mode_table[mb_xy] = 0;
5373 h->last_qscale_diff = 0;
5380 if( (s->mb_y&1) == 0 )
5382 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5385 h->prev_mb_skipped = 0;
5387 compute_mb_neighbors(h);
5388 mb_type = decode_cabac_mb_type( h );
5389 assert(mb_type >= 0);
5391 if( h->slice_type_nos == FF_B_TYPE ) {
5393 partition_count= b_mb_type_info[mb_type].partition_count;
5394 mb_type= b_mb_type_info[mb_type].type;
5397 goto decode_intra_mb;
5399 } else if( h->slice_type_nos == FF_P_TYPE ) {
5401 partition_count= p_mb_type_info[mb_type].partition_count;
5402 mb_type= p_mb_type_info[mb_type].type;
5405 goto decode_intra_mb;
5408 if(h->slice_type == FF_SI_TYPE && mb_type)
5410 assert(h->slice_type_nos == FF_I_TYPE);
5412 partition_count = 0;
5413 cbp= i_mb_type_info[mb_type].cbp;
5414 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5415 mb_type= i_mb_type_info[mb_type].type;
5418 mb_type |= MB_TYPE_INTERLACED;
5420 h->slice_table[ mb_xy ]= h->slice_num;
5422 if(IS_INTRA_PCM(mb_type)) {
5425 // We assume these blocks are very rare so we do not optimize it.
5426 // FIXME The two following lines get the bitstream position in the cabac
5427 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5428 ptr= h->cabac.bytestream;
5429 if(h->cabac.low&0x1) ptr--;
5431 if(h->cabac.low&0x1FF) ptr--;
5434 // The pixels are stored in the same order as levels in h->mb array.
5435 memcpy(h->mb, ptr, 256); ptr+=256;
5437 memcpy(h->mb+128, ptr, 128); ptr+=128;
5440 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5442 // All blocks are present
5443 h->cbp_table[mb_xy] = 0x1ef;
5444 h->chroma_pred_mode_table[mb_xy] = 0;
5445 // In deblocking, the quantizer is 0
5446 s->current_picture.qscale_table[mb_xy]= 0;
5447 // All coeffs are present
5448 memset(h->non_zero_count[mb_xy], 16, 16);
5449 s->current_picture.mb_type[mb_xy]= mb_type;
5450 h->last_qscale_diff = 0;
5455 h->ref_count[0] <<= 1;
5456 h->ref_count[1] <<= 1;
5459 fill_caches(h, mb_type, 0);
5461 if( IS_INTRA( mb_type ) ) {
5463 if( IS_INTRA4x4( mb_type ) ) {
5464 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5465 mb_type |= MB_TYPE_8x8DCT;
5466 for( i = 0; i < 16; i+=4 ) {
5467 int pred = pred_intra_mode( h, i );
5468 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5469 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5472 for( i = 0; i < 16; i++ ) {
5473 int pred = pred_intra_mode( h, i );
5474 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5476 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5479 write_back_intra_pred_mode(h);
5480 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5482 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5483 if( h->intra16x16_pred_mode < 0 ) return -1;
5486 h->chroma_pred_mode_table[mb_xy] =
5487 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5489 pred_mode= check_intra_pred_mode( h, pred_mode );
5490 if( pred_mode < 0 ) return -1;
5491 h->chroma_pred_mode= pred_mode;
5493 } else if( partition_count == 4 ) {
5494 int i, j, sub_partition_count[4], list, ref[2][4];
5496 if( h->slice_type_nos == FF_B_TYPE ) {
5497 for( i = 0; i < 4; i++ ) {
5498 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5499 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5500 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5502 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5503 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5504 pred_direct_motion(h, &mb_type);
5505 h->ref_cache[0][scan8[4]] =
5506 h->ref_cache[1][scan8[4]] =
5507 h->ref_cache[0][scan8[12]] =
5508 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5509 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5510 for( i = 0; i < 4; i++ )
5511 if( IS_DIRECT(h->sub_mb_type[i]) )
5512 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5516 for( i = 0; i < 4; i++ ) {
5517 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5518 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5519 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5523 for( list = 0; list < h->list_count; list++ ) {
5524 for( i = 0; i < 4; i++ ) {
5525 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5526 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5527 if( h->ref_count[list] > 1 ){
5528 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5529 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5530 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5538 h->ref_cache[list][ scan8[4*i]+1 ]=
5539 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5544 dct8x8_allowed = get_dct8x8_allowed(h);
5546 for(list=0; list<h->list_count; list++){
5548 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5549 if(IS_DIRECT(h->sub_mb_type[i])){
5550 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5554 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5555 const int sub_mb_type= h->sub_mb_type[i];
5556 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5557 for(j=0; j<sub_partition_count[i]; j++){
5560 const int index= 4*i + block_width*j;
5561 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5562 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5563 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5565 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5566 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5567 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5569 if(IS_SUB_8X8(sub_mb_type)){
5571 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5573 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5576 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5578 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5579 }else if(IS_SUB_8X4(sub_mb_type)){
5580 mv_cache[ 1 ][0]= mx;
5581 mv_cache[ 1 ][1]= my;
5583 mvd_cache[ 1 ][0]= mx - mpx;
5584 mvd_cache[ 1 ][1]= my - mpy;
5585 }else if(IS_SUB_4X8(sub_mb_type)){
5586 mv_cache[ 8 ][0]= mx;
5587 mv_cache[ 8 ][1]= my;
5589 mvd_cache[ 8 ][0]= mx - mpx;
5590 mvd_cache[ 8 ][1]= my - mpy;
5592 mv_cache[ 0 ][0]= mx;
5593 mv_cache[ 0 ][1]= my;
5595 mvd_cache[ 0 ][0]= mx - mpx;
5596 mvd_cache[ 0 ][1]= my - mpy;
5599 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5600 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5601 p[0] = p[1] = p[8] = p[9] = 0;
5602 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5606 } else if( IS_DIRECT(mb_type) ) {
5607 pred_direct_motion(h, &mb_type);
5608 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5609 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5610 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5612 int list, mx, my, i, mpx, mpy;
5613 if(IS_16X16(mb_type)){
5614 for(list=0; list<h->list_count; list++){
5615 if(IS_DIR(mb_type, 0, list)){
5617 if(h->ref_count[list] > 1){
5618 ref= decode_cabac_mb_ref(h, list, 0);
5619 if(ref >= (unsigned)h->ref_count[list]){
5620 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5625 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5627 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5629 for(list=0; list<h->list_count; list++){
5630 if(IS_DIR(mb_type, 0, list)){
5631 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5633 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5634 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5635 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5637 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5638 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5640 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5643 else if(IS_16X8(mb_type)){
5644 for(list=0; list<h->list_count; list++){
5646 if(IS_DIR(mb_type, i, list)){
5648 if(h->ref_count[list] > 1){
5649 ref= decode_cabac_mb_ref( h, list, 8*i );
5650 if(ref >= (unsigned)h->ref_count[list]){
5651 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5656 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5658 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5661 for(list=0; list<h->list_count; list++){
5663 if(IS_DIR(mb_type, i, list)){
5664 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5665 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5666 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5667 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5669 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5670 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5672 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5673 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5678 assert(IS_8X16(mb_type));
5679 for(list=0; list<h->list_count; list++){
5681 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5683 if(h->ref_count[list] > 1){
5684 ref= decode_cabac_mb_ref( h, list, 4*i );
5685 if(ref >= (unsigned)h->ref_count[list]){
5686 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5691 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5693 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5696 for(list=0; list<h->list_count; list++){
5698 if(IS_DIR(mb_type, i, list)){
5699 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5700 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5701 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5703 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5704 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5705 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5707 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5708 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5715 if( IS_INTER( mb_type ) ) {
5716 h->chroma_pred_mode_table[mb_xy] = 0;
5717 write_back_motion( h, mb_type );
5720 if( !IS_INTRA16x16( mb_type ) ) {
5721 cbp = decode_cabac_mb_cbp_luma( h );
5723 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5726 h->cbp_table[mb_xy] = h->cbp = cbp;
5728 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5729 if( decode_cabac_mb_transform_size( h ) )
5730 mb_type |= MB_TYPE_8x8DCT;
5732 s->current_picture.mb_type[mb_xy]= mb_type;
5734 if( cbp || IS_INTRA16x16( mb_type ) ) {
5735 const uint8_t *scan, *scan8x8, *dc_scan;
5736 const uint32_t *qmul;
5739 if(IS_INTERLACED(mb_type)){
5740 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5741 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5742 dc_scan= luma_dc_field_scan;
5744 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5745 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5746 dc_scan= luma_dc_zigzag_scan;
5749 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5750 if( dqp == INT_MIN ){
5751 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5755 if(((unsigned)s->qscale) > 51){
5756 if(s->qscale<0) s->qscale+= 52;
5757 else s->qscale-= 52;
5759 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5760 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5762 if( IS_INTRA16x16( mb_type ) ) {
5764 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5765 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5768 qmul = h->dequant4_coeff[0][s->qscale];
5769 for( i = 0; i < 16; i++ ) {
5770 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5771 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5774 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5778 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5779 if( cbp & (1<<i8x8) ) {
5780 if( IS_8x8DCT(mb_type) ) {
5781 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5782 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5784 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5785 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5786 const int index = 4*i8x8 + i4x4;
5787 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5789 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5790 //STOP_TIMER("decode_residual")
5794 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5795 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5802 for( c = 0; c < 2; c++ ) {
5803 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5804 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5810 for( c = 0; c < 2; c++ ) {
5811 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5812 for( i = 0; i < 4; i++ ) {
5813 const int index = 16 + 4 * c + i;
5814 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5815 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5819 uint8_t * const nnz= &h->non_zero_count_cache[0];
5820 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5821 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5824 uint8_t * const nnz= &h->non_zero_count_cache[0];
5825 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5826 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5827 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5828 h->last_qscale_diff = 0;
5831 s->current_picture.qscale_table[mb_xy]= s->qscale;
5832 write_back_non_zero_count(h);
5835 h->ref_count[0] >>= 1;
5836 h->ref_count[1] >>= 1;
5843 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5845 const int index_a = qp + h->slice_alpha_c0_offset;
5846 const int alpha = (alpha_table+52)[index_a];
5847 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5852 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5853 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5855 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5858 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5860 const int index_a = qp + h->slice_alpha_c0_offset;
5861 const int alpha = (alpha_table+52)[index_a];
5862 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5867 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5868 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5870 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5874 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5876 for( i = 0; i < 16; i++, pix += stride) {
5882 int bS_index = (i >> 1);
5885 bS_index |= (i & 1);
5888 if( bS[bS_index] == 0 ) {
5892 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5893 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5894 alpha = (alpha_table+52)[index_a];
5895 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5897 if( bS[bS_index] < 4 ) {
5898 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5899 const int p0 = pix[-1];
5900 const int p1 = pix[-2];
5901 const int p2 = pix[-3];
5902 const int q0 = pix[0];
5903 const int q1 = pix[1];
5904 const int q2 = pix[2];
5906 if( FFABS( p0 - q0 ) < alpha &&
5907 FFABS( p1 - p0 ) < beta &&
5908 FFABS( q1 - q0 ) < beta ) {
5912 if( FFABS( p2 - p0 ) < beta ) {
5913 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5916 if( FFABS( q2 - q0 ) < beta ) {
5917 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5921 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5922 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5923 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5924 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5927 const int p0 = pix[-1];
5928 const int p1 = pix[-2];
5929 const int p2 = pix[-3];
5931 const int q0 = pix[0];
5932 const int q1 = pix[1];
5933 const int q2 = pix[2];
5935 if( FFABS( p0 - q0 ) < alpha &&
5936 FFABS( p1 - p0 ) < beta &&
5937 FFABS( q1 - q0 ) < beta ) {
5939 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5940 if( FFABS( p2 - p0 ) < beta)
5942 const int p3 = pix[-4];
5944 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5945 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5946 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5949 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5951 if( FFABS( q2 - q0 ) < beta)
5953 const int q3 = pix[3];
5955 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5956 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5957 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5960 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5964 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5965 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5967 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5972 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5974 for( i = 0; i < 8; i++, pix += stride) {
5982 if( bS[bS_index] == 0 ) {
5986 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5987 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5988 alpha = (alpha_table+52)[index_a];
5989 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5991 if( bS[bS_index] < 4 ) {
5992 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
5993 const int p0 = pix[-1];
5994 const int p1 = pix[-2];
5995 const int q0 = pix[0];
5996 const int q1 = pix[1];
5998 if( FFABS( p0 - q0 ) < alpha &&
5999 FFABS( p1 - p0 ) < beta &&
6000 FFABS( q1 - q0 ) < beta ) {
6001 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6003 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6004 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6005 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6008 const int p0 = pix[-1];
6009 const int p1 = pix[-2];
6010 const int q0 = pix[0];
6011 const int q1 = pix[1];
6013 if( FFABS( p0 - q0 ) < alpha &&
6014 FFABS( p1 - p0 ) < beta &&
6015 FFABS( q1 - q0 ) < beta ) {
6017 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6018 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6019 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6025 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6027 const int index_a = qp + h->slice_alpha_c0_offset;
6028 const int alpha = (alpha_table+52)[index_a];
6029 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6030 const int pix_next = stride;
6035 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6036 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6038 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6042 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6044 const int index_a = qp + h->slice_alpha_c0_offset;
6045 const int alpha = (alpha_table+52)[index_a];
6046 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6051 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6052 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6054 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6058 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6059 MpegEncContext * const s = &h->s;
6060 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6062 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6066 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6067 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6068 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6069 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6070 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6073 assert(!FRAME_MBAFF);
6075 mb_type = s->current_picture.mb_type[mb_xy];
6076 qp = s->current_picture.qscale_table[mb_xy];
6077 qp0 = s->current_picture.qscale_table[mb_xy-1];
6078 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6079 qpc = get_chroma_qp( h, 0, qp );
6080 qpc0 = get_chroma_qp( h, 0, qp0 );
6081 qpc1 = get_chroma_qp( h, 0, qp1 );
6082 qp0 = (qp + qp0 + 1) >> 1;
6083 qp1 = (qp + qp1 + 1) >> 1;
6084 qpc0 = (qpc + qpc0 + 1) >> 1;
6085 qpc1 = (qpc + qpc1 + 1) >> 1;
6086 qp_thresh = 15 - h->slice_alpha_c0_offset;
6087 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6088 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6091 if( IS_INTRA(mb_type) ) {
6092 int16_t bS4[4] = {4,4,4,4};
6093 int16_t bS3[4] = {3,3,3,3};
6094 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6095 if( IS_8x8DCT(mb_type) ) {
6096 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6097 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6098 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6099 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6101 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6102 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6103 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6104 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6105 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6106 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6107 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6108 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6110 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6111 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6112 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6113 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6114 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6115 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6116 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6117 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6120 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6121 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6123 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6125 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6127 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6128 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6129 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6130 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6132 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6133 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6134 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6135 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6137 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6138 bSv[0][0] = 0x0004000400040004ULL;
6139 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6140 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6142 #define FILTER(hv,dir,edge)\
6143 if(bSv[dir][edge]) {\
6144 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6146 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6147 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6153 } else if( IS_8x8DCT(mb_type) ) {
6173 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6174 MpegEncContext * const s = &h->s;
6176 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6177 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6178 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6179 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6180 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6182 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6183 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6184 // how often to recheck mv-based bS when iterating between edges
6185 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6186 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6187 // how often to recheck mv-based bS when iterating along each edge
6188 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6190 if (first_vertical_edge_done) {
6194 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6197 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6198 && !IS_INTERLACED(mb_type)
6199 && IS_INTERLACED(mbm_type)
6201 // This is a special case in the norm where the filtering must
6202 // be done twice (one each of the field) even if we are in a
6203 // frame macroblock.
6205 static const int nnz_idx[4] = {4,5,6,3};
6206 unsigned int tmp_linesize = 2 * linesize;
6207 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6208 int mbn_xy = mb_xy - 2 * s->mb_stride;
6213 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6214 if( IS_INTRA(mb_type) ||
6215 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6216 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6218 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6219 for( i = 0; i < 4; i++ ) {
6220 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6221 mbn_nnz[nnz_idx[i]] != 0 )
6227 // Do not use s->qscale as luma quantizer because it has not the same
6228 // value in IPCM macroblocks.
6229 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6230 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6231 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6232 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6233 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6234 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6235 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6236 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6243 for( edge = start; edge < edges; edge++ ) {
6244 /* mbn_xy: neighbor macroblock */
6245 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6246 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6247 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6251 if( (edge&1) && IS_8x8DCT(mb_type) )
6254 if( IS_INTRA(mb_type) ||
6255 IS_INTRA(mbn_type) ) {
6258 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6259 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6268 bS[0] = bS[1] = bS[2] = bS[3] = value;
6273 if( edge & mask_edge ) {
6274 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6277 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6278 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6281 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6282 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6283 int bn_idx= b_idx - (dir ? 8:1);
6286 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6287 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6288 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6289 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6292 if(h->slice_type_nos == FF_B_TYPE && v){
6294 for( l = 0; !v && l < 2; l++ ) {
6296 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6297 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6298 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6302 bS[0] = bS[1] = bS[2] = bS[3] = v;
6308 for( i = 0; i < 4; i++ ) {
6309 int x = dir == 0 ? edge : i;
6310 int y = dir == 0 ? i : edge;
6311 int b_idx= 8 + 4 + x + 8*y;
6312 int bn_idx= b_idx - (dir ? 8:1);
6314 if( h->non_zero_count_cache[b_idx] |
6315 h->non_zero_count_cache[bn_idx] ) {
6321 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6322 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6323 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6324 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6330 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6332 for( l = 0; l < 2; l++ ) {
6334 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6335 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6336 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6345 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6350 // Do not use s->qscale as luma quantizer because it has not the same
6351 // value in IPCM macroblocks.
6352 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6353 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6354 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6355 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6357 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6358 if( (edge&1) == 0 ) {
6359 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6360 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6361 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6362 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6365 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6366 if( (edge&1) == 0 ) {
6367 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6368 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6369 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6370 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6376 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6377 MpegEncContext * const s = &h->s;
6378 const int mb_xy= mb_x + mb_y*s->mb_stride;
6379 const int mb_type = s->current_picture.mb_type[mb_xy];
6380 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6381 int first_vertical_edge_done = 0;
6384 //for sufficiently low qp, filtering wouldn't do anything
6385 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6387 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6388 int qp = s->current_picture.qscale_table[mb_xy];
6390 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6391 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6396 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6397 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6398 int top_type, left_type[2];
6399 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6400 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6401 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6403 if(IS_8x8DCT(top_type)){
6404 h->non_zero_count_cache[4+8*0]=
6405 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6406 h->non_zero_count_cache[6+8*0]=
6407 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6409 if(IS_8x8DCT(left_type[0])){
6410 h->non_zero_count_cache[3+8*1]=
6411 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6413 if(IS_8x8DCT(left_type[1])){
6414 h->non_zero_count_cache[3+8*3]=
6415 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6418 if(IS_8x8DCT(mb_type)){
6419 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6420 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6422 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6423 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6425 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6426 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6428 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6429 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6434 // left mb is in picture
6435 && h->slice_table[mb_xy-1] != 0xFFFF
6436 // and current and left pair do not have the same interlaced type
6437 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6438 // and left mb is in the same slice if deblocking_filter == 2
6439 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6440 /* First vertical edge is different in MBAFF frames
6441 * There are 8 different bS to compute and 2 different Qp
6443 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6444 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6449 int mb_qp, mbn0_qp, mbn1_qp;
6451 first_vertical_edge_done = 1;
6453 if( IS_INTRA(mb_type) )
6454 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6456 for( i = 0; i < 8; i++ ) {
6457 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6459 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6461 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6462 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6463 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6465 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6472 mb_qp = s->current_picture.qscale_table[mb_xy];
6473 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6474 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6475 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6476 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6477 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6478 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6479 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6480 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6481 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6482 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6483 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6484 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6487 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6488 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6489 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6490 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6491 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6495 for( dir = 0; dir < 2; dir++ )
6496 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6498 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6499 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6503 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6504 H264Context *h = *(void**)arg;
6505 MpegEncContext * const s = &h->s;
6506 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6510 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6511 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6513 if( h->pps.cabac ) {
6517 align_get_bits( &s->gb );
6520 ff_init_cabac_states( &h->cabac);
6521 ff_init_cabac_decoder( &h->cabac,
6522 s->gb.buffer + get_bits_count(&s->gb)/8,
6523 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6524 /* calculate pre-state */
6525 for( i= 0; i < 460; i++ ) {
6527 if( h->slice_type_nos == FF_I_TYPE )
6528 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6530 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6533 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6535 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6540 int ret = decode_mb_cabac(h);
6542 //STOP_TIMER("decode_mb_cabac")
6544 if(ret>=0) hl_decode_mb(h);
6546 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6549 if(ret>=0) ret = decode_mb_cabac(h);
6551 if(ret>=0) hl_decode_mb(h);
6554 eos = get_cabac_terminate( &h->cabac );
6556 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6557 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6558 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6562 if( ++s->mb_x >= s->mb_width ) {
6564 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6566 if(FIELD_OR_MBAFF_PICTURE) {
6571 if( eos || s->mb_y >= s->mb_height ) {
6572 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6573 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6580 int ret = decode_mb_cavlc(h);
6582 if(ret>=0) hl_decode_mb(h);
6584 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6586 ret = decode_mb_cavlc(h);
6588 if(ret>=0) hl_decode_mb(h);
6593 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6594 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6599 if(++s->mb_x >= s->mb_width){
6601 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6603 if(FIELD_OR_MBAFF_PICTURE) {
6606 if(s->mb_y >= s->mb_height){
6607 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6609 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6610 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6614 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6621 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6622 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6623 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6624 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6628 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6637 for(;s->mb_y < s->mb_height; s->mb_y++){
6638 for(;s->mb_x < s->mb_width; s->mb_x++){
6639 int ret= decode_mb(h);
6644 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6645 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6650 if(++s->mb_x >= s->mb_width){
6652 if(++s->mb_y >= s->mb_height){
6653 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6654 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6658 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6665 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6666 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6667 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6671 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6678 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6681 return -1; //not reached
6684 static int decode_picture_timing(H264Context *h){
6685 MpegEncContext * const s = &h->s;
6686 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6687 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6688 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6690 if(h->sps.pic_struct_present_flag){
6691 unsigned int i, num_clock_ts;
6692 h->sei_pic_struct = get_bits(&s->gb, 4);
6694 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6697 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6699 for (i = 0 ; i < num_clock_ts ; i++){
6700 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6701 unsigned int full_timestamp_flag;
6702 skip_bits(&s->gb, 2); /* ct_type */
6703 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6704 skip_bits(&s->gb, 5); /* counting_type */
6705 full_timestamp_flag = get_bits(&s->gb, 1);
6706 skip_bits(&s->gb, 1); /* discontinuity_flag */
6707 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6708 skip_bits(&s->gb, 8); /* n_frames */
6709 if(full_timestamp_flag){
6710 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6711 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6712 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6714 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6715 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6716 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6717 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6718 if(get_bits(&s->gb, 1)) /* hours_flag */
6719 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6723 if(h->sps.time_offset_length > 0)
6724 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6731 static int decode_unregistered_user_data(H264Context *h, int size){
6732 MpegEncContext * const s = &h->s;
6733 uint8_t user_data[16+256];
6739 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6740 user_data[i]= get_bits(&s->gb, 8);
6744 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6745 if(e==1 && build>=0)
6746 h->x264_build= build;
6748 if(s->avctx->debug & FF_DEBUG_BUGS)
6749 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6752 skip_bits(&s->gb, 8);
6757 static int decode_sei(H264Context *h){
6758 MpegEncContext * const s = &h->s;
6760 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6765 type+= show_bits(&s->gb, 8);
6766 }while(get_bits(&s->gb, 8) == 255);
6770 size+= show_bits(&s->gb, 8);
6771 }while(get_bits(&s->gb, 8) == 255);
6774 case 1: // Picture timing SEI
6775 if(decode_picture_timing(h) < 0)
6779 if(decode_unregistered_user_data(h, size) < 0)
6783 skip_bits(&s->gb, 8*size);
6786 //FIXME check bits here
6787 align_get_bits(&s->gb);
6793 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6794 MpegEncContext * const s = &h->s;
6796 cpb_count = get_ue_golomb(&s->gb) + 1;
6798 if(cpb_count > 32U){
6799 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6803 get_bits(&s->gb, 4); /* bit_rate_scale */
6804 get_bits(&s->gb, 4); /* cpb_size_scale */
6805 for(i=0; i<cpb_count; i++){
6806 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6807 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6808 get_bits1(&s->gb); /* cbr_flag */
6810 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6811 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6812 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6813 sps->time_offset_length = get_bits(&s->gb, 5);
6817 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6818 MpegEncContext * const s = &h->s;
6819 int aspect_ratio_info_present_flag;
6820 unsigned int aspect_ratio_idc;
6822 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6824 if( aspect_ratio_info_present_flag ) {
6825 aspect_ratio_idc= get_bits(&s->gb, 8);
6826 if( aspect_ratio_idc == EXTENDED_SAR ) {
6827 sps->sar.num= get_bits(&s->gb, 16);
6828 sps->sar.den= get_bits(&s->gb, 16);
6829 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6830 sps->sar= pixel_aspect[aspect_ratio_idc];
6832 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6839 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6841 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6842 get_bits1(&s->gb); /* overscan_appropriate_flag */
6845 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6846 get_bits(&s->gb, 3); /* video_format */
6847 get_bits1(&s->gb); /* video_full_range_flag */
6848 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6849 get_bits(&s->gb, 8); /* colour_primaries */
6850 get_bits(&s->gb, 8); /* transfer_characteristics */
6851 get_bits(&s->gb, 8); /* matrix_coefficients */
6855 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6856 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6857 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6860 sps->timing_info_present_flag = get_bits1(&s->gb);
6861 if(sps->timing_info_present_flag){
6862 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6863 sps->time_scale = get_bits_long(&s->gb, 32);
6864 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6867 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6868 if(sps->nal_hrd_parameters_present_flag)
6869 if(decode_hrd_parameters(h, sps) < 0)
6871 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6872 if(sps->vcl_hrd_parameters_present_flag)
6873 if(decode_hrd_parameters(h, sps) < 0)
6875 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6876 get_bits1(&s->gb); /* low_delay_hrd_flag */
6877 sps->pic_struct_present_flag = get_bits1(&s->gb);
6879 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6880 if(sps->bitstream_restriction_flag){
6881 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6882 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6883 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6884 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6885 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6886 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6887 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6889 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6890 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6898 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6899 const uint8_t *jvt_list, const uint8_t *fallback_list){
6900 MpegEncContext * const s = &h->s;
6901 int i, last = 8, next = 8;
6902 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6903 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6904 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6906 for(i=0;i<size;i++){
6908 next = (last + get_se_golomb(&s->gb)) & 0xff;
6909 if(!i && !next){ /* matrix not written, we use the preset one */
6910 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6913 last = factors[scan[i]] = next ? next : last;
6917 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6918 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6919 MpegEncContext * const s = &h->s;
6920 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6921 const uint8_t *fallback[4] = {
6922 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6923 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6924 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6925 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6927 if(get_bits1(&s->gb)){
6928 sps->scaling_matrix_present |= is_sps;
6929 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6930 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6931 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6932 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6933 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6934 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6935 if(is_sps || pps->transform_8x8_mode){
6936 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6937 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6942 static inline int decode_seq_parameter_set(H264Context *h){
6943 MpegEncContext * const s = &h->s;
6944 int profile_idc, level_idc;
6945 unsigned int sps_id;
6949 profile_idc= get_bits(&s->gb, 8);
6950 get_bits1(&s->gb); //constraint_set0_flag
6951 get_bits1(&s->gb); //constraint_set1_flag
6952 get_bits1(&s->gb); //constraint_set2_flag
6953 get_bits1(&s->gb); //constraint_set3_flag
6954 get_bits(&s->gb, 4); // reserved
6955 level_idc= get_bits(&s->gb, 8);
6956 sps_id= get_ue_golomb(&s->gb);
6958 if(sps_id >= MAX_SPS_COUNT) {
6959 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
6962 sps= av_mallocz(sizeof(SPS));
6966 sps->profile_idc= profile_idc;
6967 sps->level_idc= level_idc;
6969 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
6970 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
6971 sps->scaling_matrix_present = 0;
6973 if(sps->profile_idc >= 100){ //high profile
6974 sps->chroma_format_idc= get_ue_golomb(&s->gb);
6975 if(sps->chroma_format_idc == 3)
6976 get_bits1(&s->gb); //residual_color_transform_flag
6977 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6978 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6979 sps->transform_bypass = get_bits1(&s->gb);
6980 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6982 sps->chroma_format_idc= 1;
6985 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6986 sps->poc_type= get_ue_golomb(&s->gb);
6988 if(sps->poc_type == 0){ //FIXME #define
6989 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6990 } else if(sps->poc_type == 1){//FIXME #define
6991 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6992 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6993 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6994 sps->poc_cycle_length = get_ue_golomb(&s->gb);
6996 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
6997 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7001 for(i=0; i<sps->poc_cycle_length; i++)
7002 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7003 }else if(sps->poc_type != 2){
7004 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7008 sps->ref_frame_count= get_ue_golomb(&s->gb);
7009 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7010 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7013 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7014 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7015 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7016 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7017 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7018 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7022 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7023 if(!sps->frame_mbs_only_flag)
7024 sps->mb_aff= get_bits1(&s->gb);
7028 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7030 #ifndef ALLOW_INTERLACE
7032 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7034 sps->crop= get_bits1(&s->gb);
7036 sps->crop_left = get_ue_golomb(&s->gb);
7037 sps->crop_right = get_ue_golomb(&s->gb);
7038 sps->crop_top = get_ue_golomb(&s->gb);
7039 sps->crop_bottom= get_ue_golomb(&s->gb);
7040 if(sps->crop_left || sps->crop_top){
7041 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7043 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7044 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7050 sps->crop_bottom= 0;
7053 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7054 if( sps->vui_parameters_present_flag )
7055 decode_vui_parameters(h, sps);
7057 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7058 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7059 sps_id, sps->profile_idc, sps->level_idc,
7061 sps->ref_frame_count,
7062 sps->mb_width, sps->mb_height,
7063 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7064 sps->direct_8x8_inference_flag ? "8B8" : "",
7065 sps->crop_left, sps->crop_right,
7066 sps->crop_top, sps->crop_bottom,
7067 sps->vui_parameters_present_flag ? "VUI" : "",
7068 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7071 av_free(h->sps_buffers[sps_id]);
7072 h->sps_buffers[sps_id]= sps;
7080 build_qp_table(PPS *pps, int t, int index)
7083 for(i = 0; i < 52; i++)
7084 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7087 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7088 MpegEncContext * const s = &h->s;
7089 unsigned int pps_id= get_ue_golomb(&s->gb);
7092 if(pps_id >= MAX_PPS_COUNT) {
7093 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7097 pps= av_mallocz(sizeof(PPS));
7100 pps->sps_id= get_ue_golomb(&s->gb);
7101 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7102 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7106 pps->cabac= get_bits1(&s->gb);
7107 pps->pic_order_present= get_bits1(&s->gb);
7108 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7109 if(pps->slice_group_count > 1 ){
7110 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7111 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7112 switch(pps->mb_slice_group_map_type){
7115 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7116 | run_length[ i ] |1 |ue(v) |
7121 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7123 | top_left_mb[ i ] |1 |ue(v) |
7124 | bottom_right_mb[ i ] |1 |ue(v) |
7132 | slice_group_change_direction_flag |1 |u(1) |
7133 | slice_group_change_rate_minus1 |1 |ue(v) |
7138 | slice_group_id_cnt_minus1 |1 |ue(v) |
7139 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7141 | slice_group_id[ i ] |1 |u(v) |
7146 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7147 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7148 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7149 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7153 pps->weighted_pred= get_bits1(&s->gb);
7154 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7155 pps->init_qp= get_se_golomb(&s->gb) + 26;
7156 pps->init_qs= get_se_golomb(&s->gb) + 26;
7157 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7158 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7159 pps->constrained_intra_pred= get_bits1(&s->gb);
7160 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7162 pps->transform_8x8_mode= 0;
7163 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7164 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7165 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7167 if(get_bits_count(&s->gb) < bit_length){
7168 pps->transform_8x8_mode= get_bits1(&s->gb);
7169 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7170 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7172 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7175 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7176 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7177 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7178 h->pps.chroma_qp_diff= 1;
7180 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7181 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7182 pps_id, pps->sps_id,
7183 pps->cabac ? "CABAC" : "CAVLC",
7184 pps->slice_group_count,
7185 pps->ref_count[0], pps->ref_count[1],
7186 pps->weighted_pred ? "weighted" : "",
7187 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7188 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7189 pps->constrained_intra_pred ? "CONSTR" : "",
7190 pps->redundant_pic_cnt_present ? "REDU" : "",
7191 pps->transform_8x8_mode ? "8x8DCT" : ""
7195 av_free(h->pps_buffers[pps_id]);
7196 h->pps_buffers[pps_id]= pps;
7204 * Call decode_slice() for each context.
7206 * @param h h264 master context
7207 * @param context_count number of contexts to execute
7209 static void execute_decode_slices(H264Context *h, int context_count){
7210 MpegEncContext * const s = &h->s;
7211 AVCodecContext * const avctx= s->avctx;
7215 if(context_count == 1) {
7216 decode_slice(avctx, &h);
7218 for(i = 1; i < context_count; i++) {
7219 hx = h->thread_context[i];
7220 hx->s.error_recognition = avctx->error_recognition;
7221 hx->s.error_count = 0;
7224 avctx->execute(avctx, (void *)decode_slice,
7225 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7227 /* pull back stuff from slices to master context */
7228 hx = h->thread_context[context_count - 1];
7229 s->mb_x = hx->s.mb_x;
7230 s->mb_y = hx->s.mb_y;
7231 s->dropable = hx->s.dropable;
7232 s->picture_structure = hx->s.picture_structure;
7233 for(i = 1; i < context_count; i++)
7234 h->s.error_count += h->thread_context[i]->s.error_count;
7239 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7240 MpegEncContext * const s = &h->s;
7241 AVCodecContext * const avctx= s->avctx;
7243 H264Context *hx; ///< thread context
7244 int context_count = 0;
7246 h->max_contexts = avctx->thread_count;
7249 for(i=0; i<50; i++){
7250 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7253 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7254 h->current_slice = 0;
7255 if (!s->first_field)
7256 s->current_picture_ptr= NULL;
7268 if(buf_index >= buf_size) break;
7270 for(i = 0; i < h->nal_length_size; i++)
7271 nalsize = (nalsize << 8) | buf[buf_index++];
7272 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7277 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7282 // start code prefix search
7283 for(; buf_index + 3 < buf_size; buf_index++){
7284 // This should always succeed in the first iteration.
7285 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7289 if(buf_index+3 >= buf_size) break;
7294 hx = h->thread_context[context_count];
7296 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7297 if (ptr==NULL || dst_length < 0){
7300 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7302 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7304 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7305 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7308 if (h->is_avc && (nalsize != consumed)){
7309 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7313 buf_index += consumed;
7315 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7316 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7321 switch(hx->nal_unit_type){
7323 if (h->nal_unit_type != NAL_IDR_SLICE) {
7324 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7327 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7329 init_get_bits(&hx->s.gb, ptr, bit_length);
7331 hx->inter_gb_ptr= &hx->s.gb;
7332 hx->s.data_partitioning = 0;
7334 if((err = decode_slice_header(hx, h)))
7337 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7338 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7339 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7340 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7341 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7342 && avctx->skip_frame < AVDISCARD_ALL)
7346 init_get_bits(&hx->s.gb, ptr, bit_length);
7348 hx->inter_gb_ptr= NULL;
7349 hx->s.data_partitioning = 1;
7351 err = decode_slice_header(hx, h);
7354 init_get_bits(&hx->intra_gb, ptr, bit_length);
7355 hx->intra_gb_ptr= &hx->intra_gb;
7358 init_get_bits(&hx->inter_gb, ptr, bit_length);
7359 hx->inter_gb_ptr= &hx->inter_gb;
7361 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7362 && s->context_initialized
7364 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7365 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7366 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7367 && avctx->skip_frame < AVDISCARD_ALL)
7371 init_get_bits(&s->gb, ptr, bit_length);
7375 init_get_bits(&s->gb, ptr, bit_length);
7376 decode_seq_parameter_set(h);
7378 if(s->flags& CODEC_FLAG_LOW_DELAY)
7381 if(avctx->has_b_frames < 2)
7382 avctx->has_b_frames= !s->low_delay;
7385 init_get_bits(&s->gb, ptr, bit_length);
7387 decode_picture_parameter_set(h, bit_length);
7391 case NAL_END_SEQUENCE:
7392 case NAL_END_STREAM:
7393 case NAL_FILLER_DATA:
7395 case NAL_AUXILIARY_SLICE:
7398 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7401 if(context_count == h->max_contexts) {
7402 execute_decode_slices(h, context_count);
7407 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7409 /* Slice could not be decoded in parallel mode, copy down
7410 * NAL unit stuff to context 0 and restart. Note that
7411 * rbsp_buffer is not transferred, but since we no longer
7412 * run in parallel mode this should not be an issue. */
7413 h->nal_unit_type = hx->nal_unit_type;
7414 h->nal_ref_idc = hx->nal_ref_idc;
7420 execute_decode_slices(h, context_count);
7425 * returns the number of bytes consumed for building the current frame
7427 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7428 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7429 if(pos+10>buf_size) pos=buf_size; // oops ;)
7434 static int decode_frame(AVCodecContext *avctx,
7435 void *data, int *data_size,
7436 const uint8_t *buf, int buf_size)
7438 H264Context *h = avctx->priv_data;
7439 MpegEncContext *s = &h->s;
7440 AVFrame *pict = data;
7443 s->flags= avctx->flags;
7444 s->flags2= avctx->flags2;
7446 /* end of stream, output what is still in the buffers */
7447 if (buf_size == 0) {
7451 //FIXME factorize this with the output code below
7452 out = h->delayed_pic[0];
7454 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7455 if(h->delayed_pic[i]->poc < out->poc){
7456 out = h->delayed_pic[i];
7460 for(i=out_idx; h->delayed_pic[i]; i++)
7461 h->delayed_pic[i] = h->delayed_pic[i+1];
7464 *data_size = sizeof(AVFrame);
7465 *pict= *(AVFrame*)out;
7471 if(h->is_avc && !h->got_avcC) {
7472 int i, cnt, nalsize;
7473 unsigned char *p = avctx->extradata;
7474 if(avctx->extradata_size < 7) {
7475 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7479 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7482 /* sps and pps in the avcC always have length coded with 2 bytes,
7483 so put a fake nal_length_size = 2 while parsing them */
7484 h->nal_length_size = 2;
7485 // Decode sps from avcC
7486 cnt = *(p+5) & 0x1f; // Number of sps
7488 for (i = 0; i < cnt; i++) {
7489 nalsize = AV_RB16(p) + 2;
7490 if(decode_nal_units(h, p, nalsize) < 0) {
7491 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7496 // Decode pps from avcC
7497 cnt = *(p++); // Number of pps
7498 for (i = 0; i < cnt; i++) {
7499 nalsize = AV_RB16(p) + 2;
7500 if(decode_nal_units(h, p, nalsize) != nalsize) {
7501 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7506 // Now store right nal length size, that will be use to parse all other nals
7507 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7508 // Do not reparse avcC
7512 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7513 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7518 buf_index=decode_nal_units(h, buf, buf_size);
7522 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7523 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7524 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7528 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7529 Picture *out = s->current_picture_ptr;
7530 Picture *cur = s->current_picture_ptr;
7531 int i, pics, cross_idr, out_of_order, out_idx;
7535 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7536 s->current_picture_ptr->pict_type= s->pict_type;
7539 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7540 h->prev_poc_msb= h->poc_msb;
7541 h->prev_poc_lsb= h->poc_lsb;
7543 h->prev_frame_num_offset= h->frame_num_offset;
7544 h->prev_frame_num= h->frame_num;
7547 * FIXME: Error handling code does not seem to support interlaced
7548 * when slices span multiple rows
7549 * The ff_er_add_slice calls don't work right for bottom
7550 * fields; they cause massive erroneous error concealing
7551 * Error marking covers both fields (top and bottom).
7552 * This causes a mismatched s->error_count
7553 * and a bad error table. Further, the error count goes to
7554 * INT_MAX when called for bottom field, because mb_y is
7555 * past end by one (callers fault) and resync_mb_y != 0
7556 * causes problems for the first MB line, too.
7563 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7564 /* Wait for second field. */
7568 cur->repeat_pict = 0;
7570 /* Signal interlacing information externally. */
7571 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7572 if(h->sps.pic_struct_present_flag){
7573 switch (h->sei_pic_struct)
7575 case SEI_PIC_STRUCT_FRAME:
7576 cur->interlaced_frame = 0;
7578 case SEI_PIC_STRUCT_TOP_FIELD:
7579 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7580 case SEI_PIC_STRUCT_TOP_BOTTOM:
7581 case SEI_PIC_STRUCT_BOTTOM_TOP:
7582 cur->interlaced_frame = 1;
7584 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7585 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7586 // Signal the possibility of telecined film externally (pic_struct 5,6)
7587 // From these hints, let the applications decide if they apply deinterlacing.
7588 cur->repeat_pict = 1;
7589 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7591 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7592 // Force progressive here, as doubling interlaced frame is a bad idea.
7593 cur->interlaced_frame = 0;
7594 cur->repeat_pict = 2;
7596 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7597 cur->interlaced_frame = 0;
7598 cur->repeat_pict = 4;
7602 /* Derive interlacing flag from used decoding process. */
7603 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7606 if (cur->field_poc[0] != cur->field_poc[1]){
7607 /* Derive top_field_first from field pocs. */
7608 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7610 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7611 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7612 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7613 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7614 cur->top_field_first = 1;
7616 cur->top_field_first = 0;
7618 /* Most likely progressive */
7619 cur->top_field_first = 0;
7623 //FIXME do something with unavailable reference frames
7625 /* Sort B-frames into display order */
7627 if(h->sps.bitstream_restriction_flag
7628 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7629 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7633 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7634 && !h->sps.bitstream_restriction_flag){
7635 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7640 while(h->delayed_pic[pics]) pics++;
7642 assert(pics <= MAX_DELAYED_PIC_COUNT);
7644 h->delayed_pic[pics++] = cur;
7645 if(cur->reference == 0)
7646 cur->reference = DELAYED_PIC_REF;
7648 out = h->delayed_pic[0];
7650 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7651 if(h->delayed_pic[i]->poc < out->poc){
7652 out = h->delayed_pic[i];
7655 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7657 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7659 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7661 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7663 ((!cross_idr && out->poc > h->outputed_poc + 2)
7664 || cur->pict_type == FF_B_TYPE)))
7667 s->avctx->has_b_frames++;
7670 if(out_of_order || pics > s->avctx->has_b_frames){
7671 out->reference &= ~DELAYED_PIC_REF;
7672 for(i=out_idx; h->delayed_pic[i]; i++)
7673 h->delayed_pic[i] = h->delayed_pic[i+1];
7675 if(!out_of_order && pics > s->avctx->has_b_frames){
7676 *data_size = sizeof(AVFrame);
7678 h->outputed_poc = out->poc;
7679 *pict= *(AVFrame*)out;
7681 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7686 assert(pict->data[0] || !*data_size);
7687 ff_print_debug_info(s, pict);
7688 //printf("out %d\n", (int)pict->data[0]);
7691 /* Return the Picture timestamp as the frame number */
7692 /* we subtract 1 because it is added on utils.c */
7693 avctx->frame_number = s->picture_number - 1;
7695 return get_consumed_bytes(s, buf_index, buf_size);
7698 static inline void fill_mb_avail(H264Context *h){
7699 MpegEncContext * const s = &h->s;
7700 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7703 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7704 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7705 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7711 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7712 h->mb_avail[4]= 1; //FIXME move out
7713 h->mb_avail[5]= 0; //FIXME move out
7721 #define SIZE (COUNT*40)
7727 // int int_temp[10000];
7729 AVCodecContext avctx;
7731 dsputil_init(&dsp, &avctx);
7733 init_put_bits(&pb, temp, SIZE);
7734 printf("testing unsigned exp golomb\n");
7735 for(i=0; i<COUNT; i++){
7737 set_ue_golomb(&pb, i);
7738 STOP_TIMER("set_ue_golomb");
7740 flush_put_bits(&pb);
7742 init_get_bits(&gb, temp, 8*SIZE);
7743 for(i=0; i<COUNT; i++){
7746 s= show_bits(&gb, 24);
7749 j= get_ue_golomb(&gb);
7751 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7754 STOP_TIMER("get_ue_golomb");
7758 init_put_bits(&pb, temp, SIZE);
7759 printf("testing signed exp golomb\n");
7760 for(i=0; i<COUNT; i++){
7762 set_se_golomb(&pb, i - COUNT/2);
7763 STOP_TIMER("set_se_golomb");
7765 flush_put_bits(&pb);
7767 init_get_bits(&gb, temp, 8*SIZE);
7768 for(i=0; i<COUNT; i++){
7771 s= show_bits(&gb, 24);
7774 j= get_se_golomb(&gb);
7775 if(j != i - COUNT/2){
7776 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7779 STOP_TIMER("get_se_golomb");
7783 printf("testing 4x4 (I)DCT\n");
7786 uint8_t src[16], ref[16];
7787 uint64_t error= 0, max_error=0;
7789 for(i=0; i<COUNT; i++){
7791 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7792 for(j=0; j<16; j++){
7793 ref[j]= random()%255;
7794 src[j]= random()%255;
7797 h264_diff_dct_c(block, src, ref, 4);
7800 for(j=0; j<16; j++){
7801 // printf("%d ", block[j]);
7802 block[j]= block[j]*4;
7803 if(j&1) block[j]= (block[j]*4 + 2)/5;
7804 if(j&4) block[j]= (block[j]*4 + 2)/5;
7808 s->dsp.h264_idct_add(ref, block, 4);
7809 /* for(j=0; j<16; j++){
7810 printf("%d ", ref[j]);
7814 for(j=0; j<16; j++){
7815 int diff= FFABS(src[j] - ref[j]);
7818 max_error= FFMAX(max_error, diff);
7821 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7822 printf("testing quantizer\n");
7823 for(qp=0; qp<52; qp++){
7825 src1_block[i]= src2_block[i]= random()%255;
7828 printf("Testing NAL layer\n");
7830 uint8_t bitstream[COUNT];
7831 uint8_t nal[COUNT*2];
7833 memset(&h, 0, sizeof(H264Context));
7835 for(i=0; i<COUNT; i++){
7843 for(j=0; j<COUNT; j++){
7844 bitstream[j]= (random() % 255) + 1;
7847 for(j=0; j<zeros; j++){
7848 int pos= random() % COUNT;
7849 while(bitstream[pos] == 0){
7858 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7860 printf("encoding failed\n");
7864 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7868 if(out_length != COUNT){
7869 printf("incorrect length %d %d\n", out_length, COUNT);
7873 if(consumed != nal_length){
7874 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7878 if(memcmp(bitstream, out, COUNT)){
7879 printf("mismatch\n");
7885 printf("Testing RBSP\n");
7893 static av_cold int decode_end(AVCodecContext *avctx)
7895 H264Context *h = avctx->priv_data;
7896 MpegEncContext *s = &h->s;
7899 av_freep(&h->rbsp_buffer[0]);
7900 av_freep(&h->rbsp_buffer[1]);
7901 free_tables(h); //FIXME cleanup init stuff perhaps
7903 for(i = 0; i < MAX_SPS_COUNT; i++)
7904 av_freep(h->sps_buffers + i);
7906 for(i = 0; i < MAX_PPS_COUNT; i++)
7907 av_freep(h->pps_buffers + i);
7911 // memset(h, 0, sizeof(H264Context));
7917 AVCodec h264_decoder = {
7921 sizeof(H264Context),
7926 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7928 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),