2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "x86/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 const int * left_block;
110 int topleft_partition= -1;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
139 if (!curr_mb_frame_flag && (bottom || !top_mb_frame_flag)){
140 top_xy -= s->mb_stride;
142 if (!curr_mb_frame_flag && (bottom || !topleft_mb_frame_flag)){
143 topleft_xy -= s->mb_stride;
144 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
145 topleft_xy += s->mb_stride;
146 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
147 topleft_partition = 0;
149 if (!curr_mb_frame_flag && (bottom || !topright_mb_frame_flag)){
150 topright_xy -= s->mb_stride;
152 if (left_mb_frame_flag != curr_mb_frame_flag) {
153 left_xy[1] = left_xy[0] = pair_xy - 1;
154 if (curr_mb_frame_flag) {
156 left_block = left_block_options[1];
158 left_block= left_block_options[2];
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
167 h->top_mb_xy = top_xy;
168 h->left_mb_xy[0] = left_xy[0];
169 h->left_mb_xy[1] = left_xy[1];
173 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
174 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
175 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
177 if(MB_MBAFF && !IS_INTRA(mb_type)){
179 for(list=0; list<h->list_count; list++){
180 //These values where changed for ease of performing MC, we need to change them back
181 //FIXME maybe we can make MC and loop filter use the same values or prevent
182 //the MC code from changing ref_cache and rather use a temporary array.
183 if(USES_LIST(mb_type,list)){
184 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
185 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
186 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
188 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
189 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
194 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
195 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
196 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
197 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
198 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
200 if(IS_INTRA(mb_type)){
201 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
202 h->topleft_samples_available=
203 h->top_samples_available=
204 h->left_samples_available= 0xFFFF;
205 h->topright_samples_available= 0xEEEA;
207 if(!(top_type & type_mask)){
208 h->topleft_samples_available= 0xB3FF;
209 h->top_samples_available= 0x33FF;
210 h->topright_samples_available= 0x26EA;
212 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
213 if(IS_INTERLACED(mb_type)){
214 if(!(left_type[0] & type_mask)){
215 h->topleft_samples_available&= 0xDFFF;
216 h->left_samples_available&= 0x5FFF;
218 if(!(left_type[1] & type_mask)){
219 h->topleft_samples_available&= 0xFF5F;
220 h->left_samples_available&= 0xFF5F;
223 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
224 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
225 assert(left_xy[0] == left_xy[1]);
226 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
227 h->topleft_samples_available&= 0xDF5F;
228 h->left_samples_available&= 0x5F5F;
232 if(!(left_type[0] & type_mask)){
233 h->topleft_samples_available&= 0xDF5F;
234 h->left_samples_available&= 0x5F5F;
238 if(!(topleft_type & type_mask))
239 h->topleft_samples_available&= 0x7FFF;
241 if(!(topright_type & type_mask))
242 h->topright_samples_available&= 0xFBFF;
244 if(IS_INTRA4x4(mb_type)){
245 if(IS_INTRA4x4(top_type)){
246 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
247 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
248 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
249 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 if(!(top_type & type_mask))
257 h->intra4x4_pred_mode_cache[4+8*0]=
258 h->intra4x4_pred_mode_cache[5+8*0]=
259 h->intra4x4_pred_mode_cache[6+8*0]=
260 h->intra4x4_pred_mode_cache[7+8*0]= pred;
263 if(IS_INTRA4x4(left_type[i])){
264 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
265 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 if(!(left_type[i] & type_mask))
273 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
274 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
290 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
292 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
293 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
294 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
295 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
297 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
298 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
300 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
301 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
304 h->non_zero_count_cache[4+8*0]=
305 h->non_zero_count_cache[5+8*0]=
306 h->non_zero_count_cache[6+8*0]=
307 h->non_zero_count_cache[7+8*0]=
309 h->non_zero_count_cache[1+8*0]=
310 h->non_zero_count_cache[2+8*0]=
312 h->non_zero_count_cache[1+8*3]=
313 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
317 for (i=0; i<2; i++) {
319 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
320 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
321 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
322 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
324 h->non_zero_count_cache[3+8*1 + 2*8*i]=
325 h->non_zero_count_cache[3+8*2 + 2*8*i]=
326 h->non_zero_count_cache[0+8*1 + 8*i]=
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
334 h->top_cbp = h->cbp_table[top_xy];
335 } else if(IS_INTRA(mb_type)) {
342 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
343 } else if(IS_INTRA(mb_type)) {
349 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
352 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
357 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
359 for(list=0; list<h->list_count; list++){
360 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
361 /*if(!h->mv_cache_clean[list]){
362 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
363 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
364 h->mv_cache_clean[list]= 1;
368 h->mv_cache_clean[list]= 0;
370 if(USES_LIST(top_type, list)){
371 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
372 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
377 h->ref_cache[list][scan8[0] + 0 - 1*8]=
378 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
379 h->ref_cache[list][scan8[0] + 2 - 1*8]=
380 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
386 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
390 int cache_idx = scan8[0] - 1 + i*2*8;
391 if(USES_LIST(left_type[i], list)){
392 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
393 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
394 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
395 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
396 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
397 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
399 *(uint32_t*)h->mv_cache [list][cache_idx ]=
400 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
401 h->ref_cache[list][cache_idx ]=
402 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
406 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
409 if(USES_LIST(topleft_type, list)){
410 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
411 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
412 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
413 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
415 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
416 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
419 if(USES_LIST(topright_type, list)){
420 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
421 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
422 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
423 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
425 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
426 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
429 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432 h->ref_cache[list][scan8[5 ]+1] =
433 h->ref_cache[list][scan8[7 ]+1] =
434 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
435 h->ref_cache[list][scan8[4 ]] =
436 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
437 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
438 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
440 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
441 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
444 /* XXX beurk, Load mvd */
445 if(USES_LIST(top_type, list)){
446 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
457 if(USES_LIST(left_type[0], list)){
458 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
465 if(USES_LIST(left_type[1], list)){
466 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
473 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
474 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
476 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
477 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
479 if(h->slice_type_nos == FF_B_TYPE){
480 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
482 if(IS_DIRECT(top_type)){
483 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
484 }else if(IS_8X8(top_type)){
485 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
486 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
487 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
489 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
492 if(IS_DIRECT(left_type[0]))
493 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
494 else if(IS_8X8(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
497 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
499 if(IS_DIRECT(left_type[1]))
500 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
501 else if(IS_8X8(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
504 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
510 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
511 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
516 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
519 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
521 #define MAP_F2F(idx, mb_type)\
522 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
523 h->ref_cache[list][idx] <<= 1;\
524 h->mv_cache[list][idx][1] /= 2;\
525 h->mvd_cache[list][idx][1] /= 2;\
530 #define MAP_F2F(idx, mb_type)\
531 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
532 h->ref_cache[list][idx] >>= 1;\
533 h->mv_cache[list][idx][1] <<= 1;\
534 h->mvd_cache[list][idx][1] <<= 1;\
544 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
547 static inline void write_back_intra_pred_mode(H264Context *h){
548 const int mb_xy= h->mb_xy;
550 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
551 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
552 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
553 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
554 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
555 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
556 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
560 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
562 static inline int check_intra4x4_pred_mode(H264Context *h){
563 MpegEncContext * const s = &h->s;
564 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
565 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 if(!(h->top_samples_available&0x8000)){
570 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
572 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
580 if((h->left_samples_available&0x8888)!=0x8888){
581 static const int mask[4]={0x8000,0x2000,0x80,0x20};
583 if(!(h->left_samples_available&mask[i])){
584 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
586 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
596 } //FIXME cleanup like next
599 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
601 static inline int check_intra_pred_mode(H264Context *h, int mode){
602 MpegEncContext * const s = &h->s;
603 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
604 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
607 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
611 if(!(h->top_samples_available&0x8000)){
614 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
619 if((h->left_samples_available&0x8080) != 0x8080){
621 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
622 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
625 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 * gets the predicted intra4x4 prediction mode.
636 static inline int pred_intra_mode(H264Context *h, int n){
637 const int index8= scan8[n];
638 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
639 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
640 const int min= FFMIN(left, top);
642 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
644 if(min<0) return DC_PRED;
648 static inline void write_back_non_zero_count(H264Context *h){
649 const int mb_xy= h->mb_xy;
651 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
652 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
653 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
654 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
655 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
656 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
657 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
659 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
660 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
661 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
663 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
664 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
665 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
669 * gets the predicted number of non-zero coefficients.
670 * @param n block index
672 static inline int pred_non_zero_count(H264Context *h, int n){
673 const int index8= scan8[n];
674 const int left= h->non_zero_count_cache[index8 - 1];
675 const int top = h->non_zero_count_cache[index8 - 8];
678 if(i<64) i= (i+1)>>1;
680 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
685 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
686 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
687 MpegEncContext *s = &h->s;
689 /* there is no consistent mapping of mvs to neighboring locations that will
690 * make mbaff happy, so we can't move all this logic to fill_caches */
692 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
694 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
695 *C = h->mv_cache[list][scan8[0]-2];
698 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
699 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
700 if(IS_INTERLACED(mb_types[topright_xy])){
701 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
702 const int x4 = X4, y4 = Y4;\
703 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
704 if(!USES_LIST(mb_type,list))\
705 return LIST_NOT_USED;\
706 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
707 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
708 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
709 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
711 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
714 if(topright_ref == PART_NOT_AVAILABLE
715 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
716 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
718 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
719 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
722 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
724 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
725 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
731 if(topright_ref != PART_NOT_AVAILABLE){
732 *C= h->mv_cache[list][ i - 8 + part_width ];
735 tprintf(s->avctx, "topright MV not available\n");
737 *C= h->mv_cache[list][ i - 8 - 1 ];
738 return h->ref_cache[list][ i - 8 - 1 ];
743 * gets the predicted MV.
744 * @param n the block index
745 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
746 * @param mx the x component of the predicted motion vector
747 * @param my the y component of the predicted motion vector
749 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
750 const int index8= scan8[n];
751 const int top_ref= h->ref_cache[list][ index8 - 8 ];
752 const int left_ref= h->ref_cache[list][ index8 - 1 ];
753 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
754 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
756 int diagonal_ref, match_count;
758 assert(part_width==1 || part_width==2 || part_width==4);
768 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
769 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
770 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
771 if(match_count > 1){ //most common
772 *mx= mid_pred(A[0], B[0], C[0]);
773 *my= mid_pred(A[1], B[1], C[1]);
774 }else if(match_count==1){
778 }else if(top_ref==ref){
786 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
790 *mx= mid_pred(A[0], B[0], C[0]);
791 *my= mid_pred(A[1], B[1], C[1]);
795 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
799 * gets the directionally predicted 16x8 MV.
800 * @param n the block index
801 * @param mx the x component of the predicted motion vector
802 * @param my the y component of the predicted motion vector
804 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
806 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
807 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
809 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
817 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
818 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
820 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 pred_motion(h, n, 4, list, ref, mx, my);
834 * gets the directionally predicted 8x16 MV.
835 * @param n the block index
836 * @param mx the x component of the predicted motion vector
837 * @param my the y component of the predicted motion vector
839 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
841 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
842 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
844 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
855 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
857 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
859 if(diagonal_ref == ref){
867 pred_motion(h, n, 2, list, ref, mx, my);
870 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
871 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
872 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
874 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
876 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
877 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
878 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
884 pred_motion(h, 0, 4, 0, 0, mx, my);
889 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
890 int poc0 = h->ref_list[0][i].poc;
891 int td = av_clip(poc1 - poc0, -128, 127);
892 if(td == 0 || h->ref_list[0][i].long_ref){
895 int tb = av_clip(poc - poc0, -128, 127);
896 int tx = (16384 + (FFABS(td) >> 1)) / td;
897 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
901 static inline void direct_dist_scale_factor(H264Context * const h){
902 MpegEncContext * const s = &h->s;
903 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
904 const int poc1 = h->ref_list[1][0].poc;
906 for(field=0; field<2; field++){
907 const int poc = h->s.current_picture_ptr->field_poc[field];
908 const int poc1 = h->ref_list[1][0].field_poc[field];
909 for(i=0; i < 2*h->ref_count[0]; i++)
910 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
913 for(i=0; i<h->ref_count[0]; i++){
914 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
918 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 int j, old_ref, rfield;
922 int start= mbafi ? 16 : 0;
923 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
924 int interl= mbafi || s->picture_structure != PICT_FRAME;
926 /* bogus; fills in for missing frames */
927 memset(map[list], 0, sizeof(map[list]));
929 for(rfield=0; rfield<2; rfield++){
930 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
931 int poc = ref1->ref_poc[colfield][list][old_ref];
935 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
936 poc= (poc&~3) + rfield + 1;
938 for(j=start; j<end; j++){
939 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
940 int cur_ref= mbafi ? (j-16)^field : j;
941 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
943 map[list][old_ref] = cur_ref;
951 static inline void direct_ref_list_init(H264Context * const h){
952 MpegEncContext * const s = &h->s;
953 Picture * const ref1 = &h->ref_list[1][0];
954 Picture * const cur = s->current_picture_ptr;
956 int sidx= (s->picture_structure&1)^1;
957 int ref1sidx= (ref1->reference&1)^1;
959 for(list=0; list<2; list++){
960 cur->ref_count[sidx][list] = h->ref_count[list];
961 for(j=0; j<h->ref_count[list]; j++)
962 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
965 if(s->picture_structure == PICT_FRAME){
966 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
967 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
970 cur->mbaff= FRAME_MBAFF;
972 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
975 for(list=0; list<2; list++){
976 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
977 for(field=0; field<2; field++)
978 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
982 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
983 MpegEncContext * const s = &h->s;
984 int b8_stride = h->b8_stride;
985 int b4_stride = h->b_stride;
986 int mb_xy = h->mb_xy;
988 const int16_t (*l1mv0)[2], (*l1mv1)[2];
989 const int8_t *l1ref0, *l1ref1;
990 const int is_b8x8 = IS_8X8(*mb_type);
991 unsigned int sub_mb_type;
994 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
996 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
997 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
998 int cur_poc = s->current_picture_ptr->poc;
999 int *col_poc = h->ref_list[1]->field_poc;
1000 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1001 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1003 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1004 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1005 mb_xy += s->mb_stride*fieldoff;
1008 }else{ // AFL/AFR/FR/FL -> AFR/FR
1009 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1010 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1011 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1012 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1015 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1016 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1017 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1019 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1020 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1022 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1023 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1025 }else{ // AFR/FR -> AFR/FR
1028 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1029 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1030 /* FIXME save sub mb types from previous frames (or derive from MVs)
1031 * so we know exactly what block size to use */
1032 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1033 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1034 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1035 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1036 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1038 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1039 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1044 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1045 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1046 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1047 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1050 l1ref0 += h->b8_stride;
1051 l1ref1 += h->b8_stride;
1052 l1mv0 += 2*b4_stride;
1053 l1mv1 += 2*b4_stride;
1057 if(h->direct_spatial_mv_pred){
1062 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1064 /* ref = min(neighbors) */
1065 for(list=0; list<2; list++){
1066 int refa = h->ref_cache[list][scan8[0] - 1];
1067 int refb = h->ref_cache[list][scan8[0] - 8];
1068 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1069 if(refc == PART_NOT_AVAILABLE)
1070 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1071 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1076 if(ref[0] < 0 && ref[1] < 0){
1077 ref[0] = ref[1] = 0;
1078 mv[0][0] = mv[0][1] =
1079 mv[1][0] = mv[1][1] = 0;
1081 for(list=0; list<2; list++){
1083 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1085 mv[list][0] = mv[list][1] = 0;
1091 *mb_type &= ~MB_TYPE_L1;
1092 sub_mb_type &= ~MB_TYPE_L1;
1093 }else if(ref[0] < 0){
1095 *mb_type &= ~MB_TYPE_L0;
1096 sub_mb_type &= ~MB_TYPE_L0;
1099 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1100 for(i8=0; i8<4; i8++){
1103 int xy8 = x8+y8*b8_stride;
1104 int xy4 = 3*x8+y8*b4_stride;
1107 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1109 h->sub_mb_type[i8] = sub_mb_type;
1111 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1112 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1113 if(!IS_INTRA(mb_type_col[y8])
1114 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1115 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1117 a= pack16to32(mv[0][0],mv[0][1]);
1119 b= pack16to32(mv[1][0],mv[1][1]);
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 b= pack16to32(mv[1][0],mv[1][1]);
1124 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1127 }else if(IS_16X16(*mb_type)){
1130 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1131 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1132 if(!IS_INTRA(mb_type_col[0])
1133 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1134 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1135 && (h->x264_build>33 || !h->x264_build)))){
1137 a= pack16to32(mv[0][0],mv[0][1]);
1139 b= pack16to32(mv[1][0],mv[1][1]);
1141 a= pack16to32(mv[0][0],mv[0][1]);
1142 b= pack16to32(mv[1][0],mv[1][1]);
1144 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1145 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1147 for(i8=0; i8<4; i8++){
1148 const int x8 = i8&1;
1149 const int y8 = i8>>1;
1151 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1153 h->sub_mb_type[i8] = sub_mb_type;
1155 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1156 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1157 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1158 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1161 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1162 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1163 && (h->x264_build>33 || !h->x264_build)))){
1164 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1165 if(IS_SUB_8X8(sub_mb_type)){
1166 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1167 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1169 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1171 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 for(i4=0; i4<4; i4++){
1175 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1176 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1178 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1180 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1186 }else{ /* direct temporal mv pred */
1187 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1188 const int *dist_scale_factor = h->dist_scale_factor;
1191 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1192 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1193 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1194 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1196 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1199 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1200 /* FIXME assumes direct_8x8_inference == 1 */
1201 int y_shift = 2*!IS_INTERLACED(*mb_type);
1203 for(i8=0; i8<4; i8++){
1204 const int x8 = i8&1;
1205 const int y8 = i8>>1;
1207 const int16_t (*l1mv)[2]= l1mv0;
1209 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1211 h->sub_mb_type[i8] = sub_mb_type;
1213 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1214 if(IS_INTRA(mb_type_col[y8])){
1215 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1216 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1217 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 ref0 = l1ref0[x8 + y8*b8_stride];
1223 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1225 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1228 scale = dist_scale_factor[ref0];
1229 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1232 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1233 int my_col = (mv_col[1]<<y_shift)/2;
1234 int mx = (scale * mv_col[0] + 128) >> 8;
1235 int my = (scale * my_col + 128) >> 8;
1236 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1237 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1243 /* one-to-one mv scaling */
1245 if(IS_16X16(*mb_type)){
1248 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1249 if(IS_INTRA(mb_type_col[0])){
1252 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1253 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1254 const int scale = dist_scale_factor[ref0];
1255 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1257 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1258 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1260 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1261 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1263 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1264 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1265 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1267 for(i8=0; i8<4; i8++){
1268 const int x8 = i8&1;
1269 const int y8 = i8>>1;
1271 const int16_t (*l1mv)[2]= l1mv0;
1273 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1275 h->sub_mb_type[i8] = sub_mb_type;
1276 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1277 if(IS_INTRA(mb_type_col[0])){
1278 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1279 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1280 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1286 ref0 = map_col_to_list0[0][ref0];
1288 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1291 scale = dist_scale_factor[ref0];
1293 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1294 if(IS_SUB_8X8(sub_mb_type)){
1295 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1296 int mx = (scale * mv_col[0] + 128) >> 8;
1297 int my = (scale * mv_col[1] + 128) >> 8;
1298 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1299 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1301 for(i4=0; i4<4; i4++){
1302 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1303 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1304 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1305 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1306 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1307 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1314 static inline void write_back_motion(H264Context *h, int mb_type){
1315 MpegEncContext * const s = &h->s;
1316 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1317 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1320 if(!USES_LIST(mb_type, 0))
1321 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1323 for(list=0; list<h->list_count; list++){
1325 if(!USES_LIST(mb_type, list))
1329 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1332 if( h->pps.cabac ) {
1333 if(IS_SKIP(mb_type))
1334 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1337 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1343 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1344 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1345 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1346 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1347 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1351 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1352 if(IS_8X8(mb_type)){
1353 uint8_t *direct_table = &h->direct_table[b8_xy];
1354 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1355 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1356 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1362 * Decodes a network abstraction layer unit.
1363 * @param consumed is the number of bytes used as input
1364 * @param length is the length of the array
1365 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1366 * @returns decoded bytes, might be src+1 if no escapes
1368 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1373 // src[0]&0x80; //forbidden bit
1374 h->nal_ref_idc= src[0]>>5;
1375 h->nal_unit_type= src[0]&0x1F;
1379 for(i=0; i<length; i++)
1380 printf("%2X ", src[i]);
1382 for(i=0; i+1<length; i+=2){
1383 if(src[i]) continue;
1384 if(i>0 && src[i-1]==0) i--;
1385 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1387 /* startcode, so we must be past the end */
1394 if(i>=length-1){ //no escaped 0
1395 *dst_length= length;
1396 *consumed= length+1; //+1 for the header
1400 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1401 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1402 dst= h->rbsp_buffer[bufidx];
1408 //printf("decoding esc\n");
1411 //remove escapes (very rare 1:2^22)
1412 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1413 if(src[si+2]==3){ //escape
1418 }else //next start code
1422 dst[di++]= src[si++];
1425 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1428 *consumed= si + 1;//+1 for the header
1429 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1434 * identifies the exact end of the bitstream
1435 * @return the length of the trailing, or 0 if damaged
1437 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1441 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1451 * IDCT transforms the 16 dc values and dequantizes them.
1452 * @param qp quantization parameter
1454 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1457 int temp[16]; //FIXME check if this is a good idea
1458 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1459 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1461 //memset(block, 64, 2*256);
1464 const int offset= y_offset[i];
1465 const int z0= block[offset+stride*0] + block[offset+stride*4];
1466 const int z1= block[offset+stride*0] - block[offset+stride*4];
1467 const int z2= block[offset+stride*1] - block[offset+stride*5];
1468 const int z3= block[offset+stride*1] + block[offset+stride*5];
1477 const int offset= x_offset[i];
1478 const int z0= temp[4*0+i] + temp[4*2+i];
1479 const int z1= temp[4*0+i] - temp[4*2+i];
1480 const int z2= temp[4*1+i] - temp[4*3+i];
1481 const int z3= temp[4*1+i] + temp[4*3+i];
1483 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1484 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1485 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1486 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1492 * DCT transforms the 16 dc values.
1493 * @param qp quantization parameter ??? FIXME
1495 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1496 // const int qmul= dequant_coeff[qp][0];
1498 int temp[16]; //FIXME check if this is a good idea
1499 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1500 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1503 const int offset= y_offset[i];
1504 const int z0= block[offset+stride*0] + block[offset+stride*4];
1505 const int z1= block[offset+stride*0] - block[offset+stride*4];
1506 const int z2= block[offset+stride*1] - block[offset+stride*5];
1507 const int z3= block[offset+stride*1] + block[offset+stride*5];
1516 const int offset= x_offset[i];
1517 const int z0= temp[4*0+i] + temp[4*2+i];
1518 const int z1= temp[4*0+i] - temp[4*2+i];
1519 const int z2= temp[4*1+i] - temp[4*3+i];
1520 const int z3= temp[4*1+i] + temp[4*3+i];
1522 block[stride*0 +offset]= (z0 + z3)>>1;
1523 block[stride*2 +offset]= (z1 + z2)>>1;
1524 block[stride*8 +offset]= (z1 - z2)>>1;
1525 block[stride*10+offset]= (z0 - z3)>>1;
1533 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1534 const int stride= 16*2;
1535 const int xStride= 16;
1538 a= block[stride*0 + xStride*0];
1539 b= block[stride*0 + xStride*1];
1540 c= block[stride*1 + xStride*0];
1541 d= block[stride*1 + xStride*1];
1548 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1549 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1550 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1551 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1555 static void chroma_dc_dct_c(DCTELEM *block){
1556 const int stride= 16*2;
1557 const int xStride= 16;
1560 a= block[stride*0 + xStride*0];
1561 b= block[stride*0 + xStride*1];
1562 c= block[stride*1 + xStride*0];
1563 d= block[stride*1 + xStride*1];
1570 block[stride*0 + xStride*0]= (a+c);
1571 block[stride*0 + xStride*1]= (e+b);
1572 block[stride*1 + xStride*0]= (a-c);
1573 block[stride*1 + xStride*1]= (e-b);
1578 * gets the chroma qp.
1580 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1581 return h->pps.chroma_qp_table[t][qscale];
1584 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1585 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1586 int src_x_offset, int src_y_offset,
1587 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1588 MpegEncContext * const s = &h->s;
1589 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1590 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1591 const int luma_xy= (mx&3) + ((my&3)<<2);
1592 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1593 uint8_t * src_cb, * src_cr;
1594 int extra_width= h->emu_edge_width;
1595 int extra_height= h->emu_edge_height;
1597 const int full_mx= mx>>2;
1598 const int full_my= my>>2;
1599 const int pic_width = 16*s->mb_width;
1600 const int pic_height = 16*s->mb_height >> MB_FIELD;
1602 if(mx&7) extra_width -= 3;
1603 if(my&7) extra_height -= 3;
1605 if( full_mx < 0-extra_width
1606 || full_my < 0-extra_height
1607 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1608 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1609 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1610 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1614 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1616 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1619 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1622 // chroma offset when predicting from a field of opposite parity
1623 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1624 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1626 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1627 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1630 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1631 src_cb= s->edge_emu_buffer;
1633 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1636 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1637 src_cr= s->edge_emu_buffer;
1639 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1642 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1643 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1644 int x_offset, int y_offset,
1645 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1646 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1647 int list0, int list1){
1648 MpegEncContext * const s = &h->s;
1649 qpel_mc_func *qpix_op= qpix_put;
1650 h264_chroma_mc_func chroma_op= chroma_put;
1652 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1653 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1654 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1655 x_offset += 8*s->mb_x;
1656 y_offset += 8*(s->mb_y >> MB_FIELD);
1659 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1660 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1661 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1662 qpix_op, chroma_op);
1665 chroma_op= chroma_avg;
1669 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1670 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1671 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1672 qpix_op, chroma_op);
1676 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1677 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1678 int x_offset, int y_offset,
1679 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1680 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1681 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1682 int list0, int list1){
1683 MpegEncContext * const s = &h->s;
1685 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1686 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1687 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1688 x_offset += 8*s->mb_x;
1689 y_offset += 8*(s->mb_y >> MB_FIELD);
1692 /* don't optimize for luma-only case, since B-frames usually
1693 * use implicit weights => chroma too. */
1694 uint8_t *tmp_cb = s->obmc_scratchpad;
1695 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1696 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1697 int refn0 = h->ref_cache[0][ scan8[n] ];
1698 int refn1 = h->ref_cache[1][ scan8[n] ];
1700 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1701 dest_y, dest_cb, dest_cr,
1702 x_offset, y_offset, qpix_put, chroma_put);
1703 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1704 tmp_y, tmp_cb, tmp_cr,
1705 x_offset, y_offset, qpix_put, chroma_put);
1707 if(h->use_weight == 2){
1708 int weight0 = h->implicit_weight[refn0][refn1];
1709 int weight1 = 64 - weight0;
1710 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1711 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1712 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1714 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1715 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1716 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1717 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1718 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1719 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1720 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1721 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1722 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1725 int list = list1 ? 1 : 0;
1726 int refn = h->ref_cache[list][ scan8[n] ];
1727 Picture *ref= &h->ref_list[list][refn];
1728 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1729 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1730 qpix_put, chroma_put);
1732 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1734 if(h->use_weight_chroma){
1735 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1737 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1738 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1743 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1744 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1745 int x_offset, int y_offset,
1746 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1747 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1748 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1749 int list0, int list1){
1750 if((h->use_weight==2 && list0 && list1
1751 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1752 || h->use_weight==1)
1753 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1754 x_offset, y_offset, qpix_put, chroma_put,
1755 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1757 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1758 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1761 static inline void prefetch_motion(H264Context *h, int list){
1762 /* fetch pixels for estimated mv 4 macroblocks ahead
1763 * optimized for 64byte cache lines */
1764 MpegEncContext * const s = &h->s;
1765 const int refn = h->ref_cache[list][scan8[0]];
1767 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1768 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1769 uint8_t **src= h->ref_list[list][refn].data;
1770 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1771 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1772 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1773 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1777 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1778 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1779 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1780 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1781 MpegEncContext * const s = &h->s;
1782 const int mb_xy= h->mb_xy;
1783 const int mb_type= s->current_picture.mb_type[mb_xy];
1785 assert(IS_INTER(mb_type));
1787 prefetch_motion(h, 0);
1789 if(IS_16X16(mb_type)){
1790 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1791 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1792 &weight_op[0], &weight_avg[0],
1793 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1794 }else if(IS_16X8(mb_type)){
1795 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1796 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1797 &weight_op[1], &weight_avg[1],
1798 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1799 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1800 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1801 &weight_op[1], &weight_avg[1],
1802 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1803 }else if(IS_8X16(mb_type)){
1804 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1805 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1806 &weight_op[2], &weight_avg[2],
1807 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1809 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1810 &weight_op[2], &weight_avg[2],
1811 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1815 assert(IS_8X8(mb_type));
1818 const int sub_mb_type= h->sub_mb_type[i];
1820 int x_offset= (i&1)<<2;
1821 int y_offset= (i&2)<<1;
1823 if(IS_SUB_8X8(sub_mb_type)){
1824 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1825 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1826 &weight_op[3], &weight_avg[3],
1827 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1828 }else if(IS_SUB_8X4(sub_mb_type)){
1829 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1830 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1831 &weight_op[4], &weight_avg[4],
1832 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1833 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1834 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1835 &weight_op[4], &weight_avg[4],
1836 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1837 }else if(IS_SUB_4X8(sub_mb_type)){
1838 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1840 &weight_op[5], &weight_avg[5],
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1843 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1844 &weight_op[5], &weight_avg[5],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1848 assert(IS_SUB_4X4(sub_mb_type));
1850 int sub_x_offset= x_offset + 2*(j&1);
1851 int sub_y_offset= y_offset + (j&2);
1852 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1853 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1854 &weight_op[6], &weight_avg[6],
1855 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1861 prefetch_motion(h, 1);
1864 static av_cold void decode_init_vlc(void){
1865 static int done = 0;
1872 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1873 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1874 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1875 &chroma_dc_coeff_token_len [0], 1, 1,
1876 &chroma_dc_coeff_token_bits[0], 1, 1,
1877 INIT_VLC_USE_NEW_STATIC);
1881 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1882 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1883 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1884 &coeff_token_len [i][0], 1, 1,
1885 &coeff_token_bits[i][0], 1, 1,
1886 INIT_VLC_USE_NEW_STATIC);
1887 offset += coeff_token_vlc_tables_size[i];
1890 * This is a one time safety check to make sure that
1891 * the packed static coeff_token_vlc table sizes
1892 * were initialized correctly.
1894 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1897 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1898 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1899 init_vlc(&chroma_dc_total_zeros_vlc[i],
1900 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1901 &chroma_dc_total_zeros_len [i][0], 1, 1,
1902 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1903 INIT_VLC_USE_NEW_STATIC);
1905 for(i=0; i<15; i++){
1906 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1907 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1908 init_vlc(&total_zeros_vlc[i],
1909 TOTAL_ZEROS_VLC_BITS, 16,
1910 &total_zeros_len [i][0], 1, 1,
1911 &total_zeros_bits[i][0], 1, 1,
1912 INIT_VLC_USE_NEW_STATIC);
1916 run_vlc[i].table = run_vlc_tables[i];
1917 run_vlc[i].table_allocated = run_vlc_tables_size;
1918 init_vlc(&run_vlc[i],
1920 &run_len [i][0], 1, 1,
1921 &run_bits[i][0], 1, 1,
1922 INIT_VLC_USE_NEW_STATIC);
1924 run7_vlc.table = run7_vlc_table,
1925 run7_vlc.table_allocated = run7_vlc_table_size;
1926 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1927 &run_len [6][0], 1, 1,
1928 &run_bits[6][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1933 static void free_tables(H264Context *h){
1936 av_freep(&h->intra4x4_pred_mode);
1937 av_freep(&h->chroma_pred_mode_table);
1938 av_freep(&h->cbp_table);
1939 av_freep(&h->mvd_table[0]);
1940 av_freep(&h->mvd_table[1]);
1941 av_freep(&h->direct_table);
1942 av_freep(&h->non_zero_count);
1943 av_freep(&h->slice_table_base);
1944 h->slice_table= NULL;
1946 av_freep(&h->mb2b_xy);
1947 av_freep(&h->mb2b8_xy);
1949 for(i = 0; i < h->s.avctx->thread_count; i++) {
1950 hx = h->thread_context[i];
1952 av_freep(&hx->top_borders[1]);
1953 av_freep(&hx->top_borders[0]);
1954 av_freep(&hx->s.obmc_scratchpad);
1958 static void init_dequant8_coeff_table(H264Context *h){
1960 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1961 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1962 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1964 for(i=0; i<2; i++ ){
1965 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1966 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1970 for(q=0; q<52; q++){
1971 int shift = div6[q];
1974 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1975 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1976 h->pps.scaling_matrix8[i][x]) << shift;
1981 static void init_dequant4_coeff_table(H264Context *h){
1983 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1984 for(i=0; i<6; i++ ){
1985 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1987 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1988 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1995 for(q=0; q<52; q++){
1996 int shift = div6[q] + 2;
1999 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2000 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2001 h->pps.scaling_matrix4[i][x]) << shift;
2006 static void init_dequant_tables(H264Context *h){
2008 init_dequant4_coeff_table(h);
2009 if(h->pps.transform_8x8_mode)
2010 init_dequant8_coeff_table(h);
2011 if(h->sps.transform_bypass){
2014 h->dequant4_coeff[i][0][x] = 1<<6;
2015 if(h->pps.transform_8x8_mode)
2018 h->dequant8_coeff[i][0][x] = 1<<6;
2025 * needs width/height
2027 static int alloc_tables(H264Context *h){
2028 MpegEncContext * const s = &h->s;
2029 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2032 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2034 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2035 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2036 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2038 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2039 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2040 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2041 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2043 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2044 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2046 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2047 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2048 for(y=0; y<s->mb_height; y++){
2049 for(x=0; x<s->mb_width; x++){
2050 const int mb_xy= x + y*s->mb_stride;
2051 const int b_xy = 4*x + 4*y*h->b_stride;
2052 const int b8_xy= 2*x + 2*y*h->b8_stride;
2054 h->mb2b_xy [mb_xy]= b_xy;
2055 h->mb2b8_xy[mb_xy]= b8_xy;
2059 s->obmc_scratchpad = NULL;
2061 if(!h->dequant4_coeff[0])
2062 init_dequant_tables(h);
2071 * Mimic alloc_tables(), but for every context thread.
2073 static void clone_tables(H264Context *dst, H264Context *src){
2074 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2075 dst->non_zero_count = src->non_zero_count;
2076 dst->slice_table = src->slice_table;
2077 dst->cbp_table = src->cbp_table;
2078 dst->mb2b_xy = src->mb2b_xy;
2079 dst->mb2b8_xy = src->mb2b8_xy;
2080 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2081 dst->mvd_table[0] = src->mvd_table[0];
2082 dst->mvd_table[1] = src->mvd_table[1];
2083 dst->direct_table = src->direct_table;
2085 dst->s.obmc_scratchpad = NULL;
2086 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2091 * Allocate buffers which are not shared amongst multiple threads.
2093 static int context_init(H264Context *h){
2094 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2095 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2099 return -1; // free_tables will clean up for us
2102 static av_cold void common_init(H264Context *h){
2103 MpegEncContext * const s = &h->s;
2105 s->width = s->avctx->width;
2106 s->height = s->avctx->height;
2107 s->codec_id= s->avctx->codec->id;
2109 ff_h264_pred_init(&h->hpc, s->codec_id);
2111 h->dequant_coeff_pps= -1;
2112 s->unrestricted_mv=1;
2113 s->decode=1; //FIXME
2115 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2117 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2118 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2121 static av_cold int decode_init(AVCodecContext *avctx){
2122 H264Context *h= avctx->priv_data;
2123 MpegEncContext * const s = &h->s;
2125 MPV_decode_defaults(s);
2130 s->out_format = FMT_H264;
2131 s->workaround_bugs= avctx->workaround_bugs;
2134 // s->decode_mb= ff_h263_decode_mb;
2135 s->quarter_sample = 1;
2138 if(avctx->codec_id == CODEC_ID_SVQ3)
2139 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2141 avctx->pix_fmt= PIX_FMT_YUV420P;
2145 if(avctx->extradata_size > 0 && avctx->extradata &&
2146 *(char *)avctx->extradata == 1){
2153 h->thread_context[0] = h;
2154 h->outputed_poc = INT_MIN;
2155 h->prev_poc_msb= 1<<16;
2159 static int frame_start(H264Context *h){
2160 MpegEncContext * const s = &h->s;
2163 if(MPV_frame_start(s, s->avctx) < 0)
2165 ff_er_frame_start(s);
2167 * MPV_frame_start uses pict_type to derive key_frame.
2168 * This is incorrect for H.264; IDR markings must be used.
2169 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2170 * See decode_nal_units().
2172 s->current_picture_ptr->key_frame= 0;
2174 assert(s->linesize && s->uvlinesize);
2176 for(i=0; i<16; i++){
2177 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2178 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2181 h->block_offset[16+i]=
2182 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2183 h->block_offset[24+16+i]=
2184 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2187 /* can't be in alloc_tables because linesize isn't known there.
2188 * FIXME: redo bipred weight to not require extra buffer? */
2189 for(i = 0; i < s->avctx->thread_count; i++)
2190 if(!h->thread_context[i]->s.obmc_scratchpad)
2191 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2193 /* some macroblocks will be accessed before they're available */
2194 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2195 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2197 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2199 // We mark the current picture as non-reference after allocating it, so
2200 // that if we break out due to an error it can be released automatically
2201 // in the next MPV_frame_start().
2202 // SVQ3 as well as most other codecs have only last/next/current and thus
2203 // get released even with set reference, besides SVQ3 and others do not
2204 // mark frames as reference later "naturally".
2205 if(s->codec_id != CODEC_ID_SVQ3)
2206 s->current_picture_ptr->reference= 0;
2208 s->current_picture_ptr->field_poc[0]=
2209 s->current_picture_ptr->field_poc[1]= INT_MAX;
2210 assert(s->current_picture_ptr->long_ref==0);
2215 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2216 MpegEncContext * const s = &h->s;
2225 src_cb -= uvlinesize;
2226 src_cr -= uvlinesize;
2228 if(!simple && FRAME_MBAFF){
2230 offset = MB_MBAFF ? 1 : 17;
2231 uvoffset= MB_MBAFF ? 1 : 9;
2233 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2234 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2235 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2236 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2237 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2242 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2243 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2244 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2245 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2251 top_idx = MB_MBAFF ? 0 : 1;
2253 step= MB_MBAFF ? 2 : 1;
2256 // There are two lines saved, the line above the the top macroblock of a pair,
2257 // and the line above the bottom macroblock
2258 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2259 for(i=1; i<17 - skiplast; i++){
2260 h->left_border[offset+i*step]= src_y[15+i* linesize];
2263 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2264 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2266 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2267 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2268 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2269 for(i=1; i<9 - skiplast; i++){
2270 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2271 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2273 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2274 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2278 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2279 MpegEncContext * const s = &h->s;
2290 if(!simple && FRAME_MBAFF){
2292 offset = MB_MBAFF ? 1 : 17;
2293 uvoffset= MB_MBAFF ? 1 : 9;
2297 top_idx = MB_MBAFF ? 0 : 1;
2299 step= MB_MBAFF ? 2 : 1;
2302 if(h->deblocking_filter == 2) {
2304 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2305 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2307 deblock_left = (s->mb_x > 0);
2308 deblock_top = (s->mb_y > !!MB_FIELD);
2311 src_y -= linesize + 1;
2312 src_cb -= uvlinesize + 1;
2313 src_cr -= uvlinesize + 1;
2315 #define XCHG(a,b,t,xchg)\
2322 for(i = !deblock_top; i<16; i++){
2323 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2325 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2329 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2330 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2331 if(s->mb_x+1 < s->mb_width){
2332 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2336 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2338 for(i = !deblock_top; i<8; i++){
2339 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2340 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2342 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2343 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2346 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2347 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2352 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2353 MpegEncContext * const s = &h->s;
2354 const int mb_x= s->mb_x;
2355 const int mb_y= s->mb_y;
2356 const int mb_xy= h->mb_xy;
2357 const int mb_type= s->current_picture.mb_type[mb_xy];
2358 uint8_t *dest_y, *dest_cb, *dest_cr;
2359 int linesize, uvlinesize /*dct_offset*/;
2361 int *block_offset = &h->block_offset[0];
2362 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2363 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2364 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2365 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2367 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2368 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2369 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2371 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2372 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2374 if (!simple && MB_FIELD) {
2375 linesize = h->mb_linesize = s->linesize * 2;
2376 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2377 block_offset = &h->block_offset[24];
2378 if(mb_y&1){ //FIXME move out of this function?
2379 dest_y -= s->linesize*15;
2380 dest_cb-= s->uvlinesize*7;
2381 dest_cr-= s->uvlinesize*7;
2385 for(list=0; list<h->list_count; list++){
2386 if(!USES_LIST(mb_type, list))
2388 if(IS_16X16(mb_type)){
2389 int8_t *ref = &h->ref_cache[list][scan8[0]];
2390 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2392 for(i=0; i<16; i+=4){
2393 int ref = h->ref_cache[list][scan8[i]];
2395 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2401 linesize = h->mb_linesize = s->linesize;
2402 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2403 // dct_offset = s->linesize * 16;
2406 if (!simple && IS_INTRA_PCM(mb_type)) {
2407 for (i=0; i<16; i++) {
2408 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2410 for (i=0; i<8; i++) {
2411 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2412 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2415 if(IS_INTRA(mb_type)){
2416 if(h->deblocking_filter)
2417 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2419 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2420 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2421 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2424 if(IS_INTRA4x4(mb_type)){
2425 if(simple || !s->encoding){
2426 if(IS_8x8DCT(mb_type)){
2427 if(transform_bypass){
2429 idct_add = s->dsp.add_pixels8;
2431 idct_dc_add = s->dsp.h264_idct8_dc_add;
2432 idct_add = s->dsp.h264_idct8_add;
2434 for(i=0; i<16; i+=4){
2435 uint8_t * const ptr= dest_y + block_offset[i];
2436 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2437 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2438 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2440 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2441 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2442 (h->topright_samples_available<<i)&0x4000, linesize);
2444 if(nnz == 1 && h->mb[i*16])
2445 idct_dc_add(ptr, h->mb + i*16, linesize);
2447 idct_add (ptr, h->mb + i*16, linesize);
2452 if(transform_bypass){
2454 idct_add = s->dsp.add_pixels4;
2456 idct_dc_add = s->dsp.h264_idct_dc_add;
2457 idct_add = s->dsp.h264_idct_add;
2459 for(i=0; i<16; i++){
2460 uint8_t * const ptr= dest_y + block_offset[i];
2461 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2463 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2464 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2468 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2469 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2470 assert(mb_y || linesize <= block_offset[i]);
2471 if(!topright_avail){
2472 tr= ptr[3 - linesize]*0x01010101;
2473 topright= (uint8_t*) &tr;
2475 topright= ptr + 4 - linesize;
2479 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2480 nnz = h->non_zero_count_cache[ scan8[i] ];
2483 if(nnz == 1 && h->mb[i*16])
2484 idct_dc_add(ptr, h->mb + i*16, linesize);
2486 idct_add (ptr, h->mb + i*16, linesize);
2488 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2495 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2497 if(!transform_bypass)
2498 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2500 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2502 if(h->deblocking_filter)
2503 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2505 hl_motion(h, dest_y, dest_cb, dest_cr,
2506 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2507 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2508 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2512 if(!IS_INTRA4x4(mb_type)){
2514 if(IS_INTRA16x16(mb_type)){
2515 if(transform_bypass){
2516 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2517 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2519 for(i=0; i<16; i++){
2520 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2521 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2525 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2527 }else if(h->cbp&15){
2528 if(transform_bypass){
2529 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2530 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2531 for(i=0; i<16; i+=di){
2532 if(h->non_zero_count_cache[ scan8[i] ]){
2533 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2537 if(IS_8x8DCT(mb_type)){
2538 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2540 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2545 for(i=0; i<16; i++){
2546 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2547 uint8_t * const ptr= dest_y + block_offset[i];
2548 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2554 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2555 uint8_t *dest[2] = {dest_cb, dest_cr};
2556 if(transform_bypass){
2557 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2558 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2559 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2561 idct_add = s->dsp.add_pixels4;
2562 for(i=16; i<16+8; i++){
2563 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2564 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2568 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2569 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2571 idct_add = s->dsp.h264_idct_add;
2572 idct_dc_add = s->dsp.h264_idct_dc_add;
2573 for(i=16; i<16+8; i++){
2574 if(h->non_zero_count_cache[ scan8[i] ])
2575 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2576 else if(h->mb[i*16])
2577 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2580 for(i=16; i<16+8; i++){
2581 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2582 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2583 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2590 if(h->cbp || IS_INTRA(mb_type))
2591 s->dsp.clear_blocks(h->mb);
2593 if(h->deblocking_filter) {
2594 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2595 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2596 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2597 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2598 if (!simple && FRAME_MBAFF) {
2599 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2601 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2607 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2609 static void hl_decode_mb_simple(H264Context *h){
2610 hl_decode_mb_internal(h, 1);
2614 * Process a macroblock; this handles edge cases, such as interlacing.
2616 static void av_noinline hl_decode_mb_complex(H264Context *h){
2617 hl_decode_mb_internal(h, 0);
2620 static void hl_decode_mb(H264Context *h){
2621 MpegEncContext * const s = &h->s;
2622 const int mb_xy= h->mb_xy;
2623 const int mb_type= s->current_picture.mb_type[mb_xy];
2624 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2626 if(ENABLE_H264_ENCODER && !s->decode)
2630 hl_decode_mb_complex(h);
2631 else hl_decode_mb_simple(h);
2634 static void pic_as_field(Picture *pic, const int parity){
2636 for (i = 0; i < 4; ++i) {
2637 if (parity == PICT_BOTTOM_FIELD)
2638 pic->data[i] += pic->linesize[i];
2639 pic->reference = parity;
2640 pic->linesize[i] *= 2;
2642 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2645 static int split_field_copy(Picture *dest, Picture *src,
2646 int parity, int id_add){
2647 int match = !!(src->reference & parity);
2651 if(parity != PICT_FRAME){
2652 pic_as_field(dest, parity);
2654 dest->pic_id += id_add;
2661 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2665 while(i[0]<len || i[1]<len){
2666 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2668 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2671 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2672 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2675 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2676 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2683 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2688 best_poc= dir ? INT_MIN : INT_MAX;
2690 for(i=0; i<len; i++){
2691 const int poc= src[i]->poc;
2692 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2694 sorted[out_i]= src[i];
2697 if(best_poc == (dir ? INT_MIN : INT_MAX))
2699 limit= sorted[out_i++]->poc - dir;
2705 * fills the default_ref_list.
2707 static int fill_default_ref_list(H264Context *h){
2708 MpegEncContext * const s = &h->s;
2711 if(h->slice_type_nos==FF_B_TYPE){
2712 Picture *sorted[32];
2717 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2719 cur_poc= s->current_picture_ptr->poc;
2721 for(list= 0; list<2; list++){
2722 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2723 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2725 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2726 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2729 if(len < h->ref_count[list])
2730 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2734 if(lens[0] == lens[1] && lens[1] > 1){
2735 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2737 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2740 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2741 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2743 if(len < h->ref_count[0])
2744 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2747 for (i=0; i<h->ref_count[0]; i++) {
2748 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2750 if(h->slice_type_nos==FF_B_TYPE){
2751 for (i=0; i<h->ref_count[1]; i++) {
2752 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2759 static void print_short_term(H264Context *h);
2760 static void print_long_term(H264Context *h);
2763 * Extract structure information about the picture described by pic_num in
2764 * the current decoding context (frame or field). Note that pic_num is
2765 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2766 * @param pic_num picture number for which to extract structure information
2767 * @param structure one of PICT_XXX describing structure of picture
2769 * @return frame number (short term) or long term index of picture
2770 * described by pic_num
2772 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2773 MpegEncContext * const s = &h->s;
2775 *structure = s->picture_structure;
2778 /* opposite field */
2779 *structure ^= PICT_FRAME;
2786 static int decode_ref_pic_list_reordering(H264Context *h){
2787 MpegEncContext * const s = &h->s;
2788 int list, index, pic_structure;
2790 print_short_term(h);
2793 for(list=0; list<h->list_count; list++){
2794 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2796 if(get_bits1(&s->gb)){
2797 int pred= h->curr_pic_num;
2799 for(index=0; ; index++){
2800 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2801 unsigned int pic_id;
2803 Picture *ref = NULL;
2805 if(reordering_of_pic_nums_idc==3)
2808 if(index >= h->ref_count[list]){
2809 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2813 if(reordering_of_pic_nums_idc<3){
2814 if(reordering_of_pic_nums_idc<2){
2815 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2818 if(abs_diff_pic_num > h->max_pic_num){
2819 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2823 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2824 else pred+= abs_diff_pic_num;
2825 pred &= h->max_pic_num - 1;
2827 frame_num = pic_num_extract(h, pred, &pic_structure);
2829 for(i= h->short_ref_count-1; i>=0; i--){
2830 ref = h->short_ref[i];
2831 assert(ref->reference);
2832 assert(!ref->long_ref);
2834 ref->frame_num == frame_num &&
2835 (ref->reference & pic_structure)
2843 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2845 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2848 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2851 ref = h->long_ref[long_idx];
2852 assert(!(ref && !ref->reference));
2853 if(ref && (ref->reference & pic_structure)){
2854 ref->pic_id= pic_id;
2855 assert(ref->long_ref);
2863 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2864 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2866 for(i=index; i+1<h->ref_count[list]; i++){
2867 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2870 for(; i > index; i--){
2871 h->ref_list[list][i]= h->ref_list[list][i-1];
2873 h->ref_list[list][index]= *ref;
2875 pic_as_field(&h->ref_list[list][index], pic_structure);
2879 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2885 for(list=0; list<h->list_count; list++){
2886 for(index= 0; index < h->ref_count[list]; index++){
2887 if(!h->ref_list[list][index].data[0]){
2888 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2889 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2897 static void fill_mbaff_ref_list(H264Context *h){
2899 for(list=0; list<2; list++){ //FIXME try list_count
2900 for(i=0; i<h->ref_count[list]; i++){
2901 Picture *frame = &h->ref_list[list][i];
2902 Picture *field = &h->ref_list[list][16+2*i];
2905 field[0].linesize[j] <<= 1;
2906 field[0].reference = PICT_TOP_FIELD;
2907 field[0].poc= field[0].field_poc[0];
2908 field[1] = field[0];
2910 field[1].data[j] += frame->linesize[j];
2911 field[1].reference = PICT_BOTTOM_FIELD;
2912 field[1].poc= field[1].field_poc[1];
2914 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2915 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2917 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2918 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2922 for(j=0; j<h->ref_count[1]; j++){
2923 for(i=0; i<h->ref_count[0]; i++)
2924 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2925 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2926 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2930 static int pred_weight_table(H264Context *h){
2931 MpegEncContext * const s = &h->s;
2933 int luma_def, chroma_def;
2936 h->use_weight_chroma= 0;
2937 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2938 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2939 luma_def = 1<<h->luma_log2_weight_denom;
2940 chroma_def = 1<<h->chroma_log2_weight_denom;
2942 for(list=0; list<2; list++){
2943 for(i=0; i<h->ref_count[list]; i++){
2944 int luma_weight_flag, chroma_weight_flag;
2946 luma_weight_flag= get_bits1(&s->gb);
2947 if(luma_weight_flag){
2948 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2949 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2950 if( h->luma_weight[list][i] != luma_def
2951 || h->luma_offset[list][i] != 0)
2954 h->luma_weight[list][i]= luma_def;
2955 h->luma_offset[list][i]= 0;
2959 chroma_weight_flag= get_bits1(&s->gb);
2960 if(chroma_weight_flag){
2963 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2964 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2965 if( h->chroma_weight[list][i][j] != chroma_def
2966 || h->chroma_offset[list][i][j] != 0)
2967 h->use_weight_chroma= 1;
2972 h->chroma_weight[list][i][j]= chroma_def;
2973 h->chroma_offset[list][i][j]= 0;
2978 if(h->slice_type_nos != FF_B_TYPE) break;
2980 h->use_weight= h->use_weight || h->use_weight_chroma;
2984 static void implicit_weight_table(H264Context *h){
2985 MpegEncContext * const s = &h->s;
2987 int cur_poc = s->current_picture_ptr->poc;
2989 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2990 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2992 h->use_weight_chroma= 0;
2997 h->use_weight_chroma= 2;
2998 h->luma_log2_weight_denom= 5;
2999 h->chroma_log2_weight_denom= 5;
3001 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3002 int poc0 = h->ref_list[0][ref0].poc;
3003 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3004 int poc1 = h->ref_list[1][ref1].poc;
3005 int td = av_clip(poc1 - poc0, -128, 127);
3007 int tb = av_clip(cur_poc - poc0, -128, 127);
3008 int tx = (16384 + (FFABS(td) >> 1)) / td;
3009 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3010 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3011 h->implicit_weight[ref0][ref1] = 32;
3013 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3015 h->implicit_weight[ref0][ref1] = 32;
3021 * Mark a picture as no longer needed for reference. The refmask
3022 * argument allows unreferencing of individual fields or the whole frame.
3023 * If the picture becomes entirely unreferenced, but is being held for
3024 * display purposes, it is marked as such.
3025 * @param refmask mask of fields to unreference; the mask is bitwise
3026 * anded with the reference marking of pic
3027 * @return non-zero if pic becomes entirely unreferenced (except possibly
3028 * for display purposes) zero if one of the fields remains in
3031 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3033 if (pic->reference &= refmask) {
3036 for(i = 0; h->delayed_pic[i]; i++)
3037 if(pic == h->delayed_pic[i]){
3038 pic->reference=DELAYED_PIC_REF;
3046 * instantaneous decoder refresh.
3048 static void idr(H264Context *h){
3051 for(i=0; i<16; i++){
3052 remove_long(h, i, 0);
3054 assert(h->long_ref_count==0);
3056 for(i=0; i<h->short_ref_count; i++){
3057 unreference_pic(h, h->short_ref[i], 0);
3058 h->short_ref[i]= NULL;
3060 h->short_ref_count=0;
3061 h->prev_frame_num= 0;
3062 h->prev_frame_num_offset= 0;
3067 /* forget old pics after a seek */
3068 static void flush_dpb(AVCodecContext *avctx){
3069 H264Context *h= avctx->priv_data;
3071 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3072 if(h->delayed_pic[i])
3073 h->delayed_pic[i]->reference= 0;
3074 h->delayed_pic[i]= NULL;
3076 h->outputed_poc= INT_MIN;
3078 if(h->s.current_picture_ptr)
3079 h->s.current_picture_ptr->reference= 0;
3080 h->s.first_field= 0;
3081 ff_mpeg_flush(avctx);
3085 * Find a Picture in the short term reference list by frame number.
3086 * @param frame_num frame number to search for
3087 * @param idx the index into h->short_ref where returned picture is found
3088 * undefined if no picture found.
3089 * @return pointer to the found picture, or NULL if no pic with the provided
3090 * frame number is found
3092 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3093 MpegEncContext * const s = &h->s;
3096 for(i=0; i<h->short_ref_count; i++){
3097 Picture *pic= h->short_ref[i];
3098 if(s->avctx->debug&FF_DEBUG_MMCO)
3099 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3100 if(pic->frame_num == frame_num) {
3109 * Remove a picture from the short term reference list by its index in
3110 * that list. This does no checking on the provided index; it is assumed
3111 * to be valid. Other list entries are shifted down.
3112 * @param i index into h->short_ref of picture to remove.
3114 static void remove_short_at_index(H264Context *h, int i){
3115 assert(i >= 0 && i < h->short_ref_count);
3116 h->short_ref[i]= NULL;
3117 if (--h->short_ref_count)
3118 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3123 * @return the removed picture or NULL if an error occurs
3125 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3126 MpegEncContext * const s = &h->s;
3130 if(s->avctx->debug&FF_DEBUG_MMCO)
3131 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3133 pic = find_short(h, frame_num, &i);
3135 if(unreference_pic(h, pic, ref_mask))
3136 remove_short_at_index(h, i);
3143 * Remove a picture from the long term reference list by its index in
3145 * @return the removed picture or NULL if an error occurs
3147 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3150 pic= h->long_ref[i];
3152 if(unreference_pic(h, pic, ref_mask)){
3153 assert(h->long_ref[i]->long_ref == 1);
3154 h->long_ref[i]->long_ref= 0;
3155 h->long_ref[i]= NULL;
3156 h->long_ref_count--;
3164 * print short term list
3166 static void print_short_term(H264Context *h) {
3168 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3169 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3170 for(i=0; i<h->short_ref_count; i++){
3171 Picture *pic= h->short_ref[i];
3172 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3178 * print long term list
3180 static void print_long_term(H264Context *h) {
3182 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3183 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3184 for(i = 0; i < 16; i++){
3185 Picture *pic= h->long_ref[i];
3187 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3194 * Executes the reference picture marking (memory management control operations).
3196 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3197 MpegEncContext * const s = &h->s;
3199 int current_ref_assigned=0;
3202 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3203 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3205 for(i=0; i<mmco_count; i++){
3206 int structure, frame_num;
3207 if(s->avctx->debug&FF_DEBUG_MMCO)
3208 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3210 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3211 || mmco[i].opcode == MMCO_SHORT2LONG){
3212 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3213 pic = find_short(h, frame_num, &j);
3215 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3216 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3217 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3222 switch(mmco[i].opcode){
3223 case MMCO_SHORT2UNUSED:
3224 if(s->avctx->debug&FF_DEBUG_MMCO)
3225 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3226 remove_short(h, frame_num, structure ^ PICT_FRAME);
3228 case MMCO_SHORT2LONG:
3229 if (h->long_ref[mmco[i].long_arg] != pic)
3230 remove_long(h, mmco[i].long_arg, 0);
3232 remove_short_at_index(h, j);
3233 h->long_ref[ mmco[i].long_arg ]= pic;
3234 if (h->long_ref[ mmco[i].long_arg ]){
3235 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3236 h->long_ref_count++;
3239 case MMCO_LONG2UNUSED:
3240 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3241 pic = h->long_ref[j];
3243 remove_long(h, j, structure ^ PICT_FRAME);
3244 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3245 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3248 // Comment below left from previous code as it is an interresting note.
3249 /* First field in pair is in short term list or
3250 * at a different long term index.
3251 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3252 * Report the problem and keep the pair where it is,
3253 * and mark this field valid.
3256 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3257 remove_long(h, mmco[i].long_arg, 0);
3259 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3260 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3261 h->long_ref_count++;
3264 s->current_picture_ptr->reference |= s->picture_structure;
3265 current_ref_assigned=1;
3267 case MMCO_SET_MAX_LONG:
3268 assert(mmco[i].long_arg <= 16);
3269 // just remove the long term which index is greater than new max
3270 for(j = mmco[i].long_arg; j<16; j++){
3271 remove_long(h, j, 0);
3275 while(h->short_ref_count){
3276 remove_short(h, h->short_ref[0]->frame_num, 0);
3278 for(j = 0; j < 16; j++) {
3279 remove_long(h, j, 0);
3281 s->current_picture_ptr->poc=
3282 s->current_picture_ptr->field_poc[0]=
3283 s->current_picture_ptr->field_poc[1]=
3287 s->current_picture_ptr->frame_num= 0;
3293 if (!current_ref_assigned) {
3294 /* Second field of complementary field pair; the first field of
3295 * which is already referenced. If short referenced, it
3296 * should be first entry in short_ref. If not, it must exist
3297 * in long_ref; trying to put it on the short list here is an
3298 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3300 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3301 /* Just mark the second field valid */
3302 s->current_picture_ptr->reference = PICT_FRAME;
3303 } else if (s->current_picture_ptr->long_ref) {
3304 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3305 "assignment for second field "
3306 "in complementary field pair "
3307 "(first field is long term)\n");
3309 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3311 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3314 if(h->short_ref_count)
3315 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3317 h->short_ref[0]= s->current_picture_ptr;
3318 h->short_ref_count++;
3319 s->current_picture_ptr->reference |= s->picture_structure;
3323 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3325 /* We have too many reference frames, probably due to corrupted
3326 * stream. Need to discard one frame. Prevents overrun of the
3327 * short_ref and long_ref buffers.
3329 av_log(h->s.avctx, AV_LOG_ERROR,
3330 "number of reference frames exceeds max (probably "
3331 "corrupt input), discarding one\n");
3333 if (h->long_ref_count && !h->short_ref_count) {
3334 for (i = 0; i < 16; ++i)
3339 remove_long(h, i, 0);
3341 pic = h->short_ref[h->short_ref_count - 1];
3342 remove_short(h, pic->frame_num, 0);
3346 print_short_term(h);
3351 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3352 MpegEncContext * const s = &h->s;
3356 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3357 s->broken_link= get_bits1(gb) -1;
3359 h->mmco[0].opcode= MMCO_LONG;
3360 h->mmco[0].long_arg= 0;
3364 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3365 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3366 MMCOOpcode opcode= get_ue_golomb(gb);
3368 h->mmco[i].opcode= opcode;
3369 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3370 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3371 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3372 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3376 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3377 unsigned int long_arg= get_ue_golomb(gb);
3378 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3379 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3382 h->mmco[i].long_arg= long_arg;
3385 if(opcode > (unsigned)MMCO_LONG){
3386 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3389 if(opcode == MMCO_END)
3394 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3396 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3397 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3398 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3399 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3401 if (FIELD_PICTURE) {
3402 h->mmco[0].short_pic_num *= 2;
3403 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3404 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3414 static int init_poc(H264Context *h){
3415 MpegEncContext * const s = &h->s;
3416 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3418 Picture *cur = s->current_picture_ptr;
3420 h->frame_num_offset= h->prev_frame_num_offset;
3421 if(h->frame_num < h->prev_frame_num)
3422 h->frame_num_offset += max_frame_num;
3424 if(h->sps.poc_type==0){
3425 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3427 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3428 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3429 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3430 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3432 h->poc_msb = h->prev_poc_msb;
3433 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3435 field_poc[1] = h->poc_msb + h->poc_lsb;
3436 if(s->picture_structure == PICT_FRAME)
3437 field_poc[1] += h->delta_poc_bottom;
3438 }else if(h->sps.poc_type==1){
3439 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3442 if(h->sps.poc_cycle_length != 0)
3443 abs_frame_num = h->frame_num_offset + h->frame_num;
3447 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3450 expected_delta_per_poc_cycle = 0;
3451 for(i=0; i < h->sps.poc_cycle_length; i++)
3452 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3454 if(abs_frame_num > 0){
3455 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3456 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3458 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3459 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3460 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3464 if(h->nal_ref_idc == 0)
3465 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3467 field_poc[0] = expectedpoc + h->delta_poc[0];
3468 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3470 if(s->picture_structure == PICT_FRAME)
3471 field_poc[1] += h->delta_poc[1];
3473 int poc= 2*(h->frame_num_offset + h->frame_num);
3482 if(s->picture_structure != PICT_BOTTOM_FIELD)
3483 s->current_picture_ptr->field_poc[0]= field_poc[0];
3484 if(s->picture_structure != PICT_TOP_FIELD)
3485 s->current_picture_ptr->field_poc[1]= field_poc[1];
3486 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3493 * initialize scan tables
3495 static void init_scan_tables(H264Context *h){
3496 MpegEncContext * const s = &h->s;
3498 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3499 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3500 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3502 for(i=0; i<16; i++){
3503 #define T(x) (x>>2) | ((x<<2) & 0xF)
3504 h->zigzag_scan[i] = T(zigzag_scan[i]);
3505 h-> field_scan[i] = T( field_scan[i]);
3509 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3510 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3511 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3512 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3513 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3515 for(i=0; i<64; i++){
3516 #define T(x) (x>>3) | ((x&7)<<3)
3517 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3518 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3519 h->field_scan8x8[i] = T(field_scan8x8[i]);
3520 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3524 if(h->sps.transform_bypass){ //FIXME same ugly
3525 h->zigzag_scan_q0 = zigzag_scan;
3526 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3527 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3528 h->field_scan_q0 = field_scan;
3529 h->field_scan8x8_q0 = field_scan8x8;
3530 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3532 h->zigzag_scan_q0 = h->zigzag_scan;
3533 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3534 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3535 h->field_scan_q0 = h->field_scan;
3536 h->field_scan8x8_q0 = h->field_scan8x8;
3537 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3542 * Replicates H264 "master" context to thread contexts.
3544 static void clone_slice(H264Context *dst, H264Context *src)
3546 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3547 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3548 dst->s.current_picture = src->s.current_picture;
3549 dst->s.linesize = src->s.linesize;
3550 dst->s.uvlinesize = src->s.uvlinesize;
3551 dst->s.first_field = src->s.first_field;
3553 dst->prev_poc_msb = src->prev_poc_msb;
3554 dst->prev_poc_lsb = src->prev_poc_lsb;
3555 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3556 dst->prev_frame_num = src->prev_frame_num;
3557 dst->short_ref_count = src->short_ref_count;
3559 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3560 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3561 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3562 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3564 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3565 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3569 * decodes a slice header.
3570 * This will also call MPV_common_init() and frame_start() as needed.
3572 * @param h h264context
3573 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3575 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3577 static int decode_slice_header(H264Context *h, H264Context *h0){
3578 MpegEncContext * const s = &h->s;
3579 MpegEncContext * const s0 = &h0->s;
3580 unsigned int first_mb_in_slice;
3581 unsigned int pps_id;
3582 int num_ref_idx_active_override_flag;
3583 unsigned int slice_type, tmp, i, j;
3584 int default_ref_list_done = 0;
3585 int last_pic_structure;
3587 s->dropable= h->nal_ref_idc == 0;
3589 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3590 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3591 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3593 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3594 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3597 first_mb_in_slice= get_ue_golomb(&s->gb);
3599 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3600 h0->current_slice = 0;
3601 if (!s0->first_field)
3602 s->current_picture_ptr= NULL;
3605 slice_type= get_ue_golomb(&s->gb);
3607 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3612 h->slice_type_fixed=1;
3614 h->slice_type_fixed=0;
3616 slice_type= golomb_to_pict_type[ slice_type ];
3617 if (slice_type == FF_I_TYPE
3618 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3619 default_ref_list_done = 1;
3621 h->slice_type= slice_type;
3622 h->slice_type_nos= slice_type & 3;
3624 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3625 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3626 av_log(h->s.avctx, AV_LOG_ERROR,
3627 "B picture before any references, skipping\n");
3631 pps_id= get_ue_golomb(&s->gb);
3632 if(pps_id>=MAX_PPS_COUNT){
3633 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3636 if(!h0->pps_buffers[pps_id]) {
3637 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3640 h->pps= *h0->pps_buffers[pps_id];
3642 if(!h0->sps_buffers[h->pps.sps_id]) {
3643 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3646 h->sps = *h0->sps_buffers[h->pps.sps_id];
3648 if(h == h0 && h->dequant_coeff_pps != pps_id){
3649 h->dequant_coeff_pps = pps_id;
3650 init_dequant_tables(h);
3653 s->mb_width= h->sps.mb_width;
3654 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3656 h->b_stride= s->mb_width*4;
3657 h->b8_stride= s->mb_width*2;
3659 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3660 if(h->sps.frame_mbs_only_flag)
3661 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3663 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3665 if (s->context_initialized
3666 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3668 return -1; // width / height changed during parallelized decoding
3670 flush_dpb(s->avctx);
3673 if (!s->context_initialized) {
3675 return -1; // we cant (re-)initialize context during parallel decoding
3676 if (MPV_common_init(s) < 0)
3680 init_scan_tables(h);
3683 for(i = 1; i < s->avctx->thread_count; i++) {
3685 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3686 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3687 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3690 init_scan_tables(c);
3694 for(i = 0; i < s->avctx->thread_count; i++)
3695 if(context_init(h->thread_context[i]) < 0)
3698 s->avctx->width = s->width;
3699 s->avctx->height = s->height;
3700 s->avctx->sample_aspect_ratio= h->sps.sar;
3701 if(!s->avctx->sample_aspect_ratio.den)
3702 s->avctx->sample_aspect_ratio.den = 1;
3704 if(h->sps.timing_info_present_flag){
3705 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3706 if(h->x264_build > 0 && h->x264_build < 44)
3707 s->avctx->time_base.den *= 2;
3708 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3709 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3713 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3716 h->mb_aff_frame = 0;
3717 last_pic_structure = s0->picture_structure;
3718 if(h->sps.frame_mbs_only_flag){
3719 s->picture_structure= PICT_FRAME;
3721 if(get_bits1(&s->gb)) { //field_pic_flag
3722 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3724 s->picture_structure= PICT_FRAME;
3725 h->mb_aff_frame = h->sps.mb_aff;
3728 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3730 if(h0->current_slice == 0){
3731 while(h->frame_num != h->prev_frame_num &&
3732 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3733 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3735 h->prev_frame_num++;
3736 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3737 s->current_picture_ptr->frame_num= h->prev_frame_num;
3738 execute_ref_pic_marking(h, NULL, 0);
3741 /* See if we have a decoded first field looking for a pair... */
3742 if (s0->first_field) {
3743 assert(s0->current_picture_ptr);
3744 assert(s0->current_picture_ptr->data[0]);
3745 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3747 /* figure out if we have a complementary field pair */
3748 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3750 * Previous field is unmatched. Don't display it, but let it
3751 * remain for reference if marked as such.
3753 s0->current_picture_ptr = NULL;
3754 s0->first_field = FIELD_PICTURE;
3757 if (h->nal_ref_idc &&
3758 s0->current_picture_ptr->reference &&
3759 s0->current_picture_ptr->frame_num != h->frame_num) {
3761 * This and previous field were reference, but had
3762 * different frame_nums. Consider this field first in
3763 * pair. Throw away previous field except for reference
3766 s0->first_field = 1;
3767 s0->current_picture_ptr = NULL;
3770 /* Second field in complementary pair */
3771 s0->first_field = 0;
3776 /* Frame or first field in a potentially complementary pair */
3777 assert(!s0->current_picture_ptr);
3778 s0->first_field = FIELD_PICTURE;
3781 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3782 s0->first_field = 0;
3789 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3791 assert(s->mb_num == s->mb_width * s->mb_height);
3792 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3793 first_mb_in_slice >= s->mb_num){
3794 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3797 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3798 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3799 if (s->picture_structure == PICT_BOTTOM_FIELD)
3800 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3801 assert(s->mb_y < s->mb_height);
3803 if(s->picture_structure==PICT_FRAME){
3804 h->curr_pic_num= h->frame_num;
3805 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3807 h->curr_pic_num= 2*h->frame_num + 1;
3808 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3811 if(h->nal_unit_type == NAL_IDR_SLICE){
3812 get_ue_golomb(&s->gb); /* idr_pic_id */
3815 if(h->sps.poc_type==0){
3816 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3818 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3819 h->delta_poc_bottom= get_se_golomb(&s->gb);
3823 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3824 h->delta_poc[0]= get_se_golomb(&s->gb);
3826 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3827 h->delta_poc[1]= get_se_golomb(&s->gb);
3832 if(h->pps.redundant_pic_cnt_present){
3833 h->redundant_pic_count= get_ue_golomb(&s->gb);
3836 //set defaults, might be overridden a few lines later
3837 h->ref_count[0]= h->pps.ref_count[0];
3838 h->ref_count[1]= h->pps.ref_count[1];
3840 if(h->slice_type_nos != FF_I_TYPE){
3841 if(h->slice_type_nos == FF_B_TYPE){
3842 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3844 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3846 if(num_ref_idx_active_override_flag){
3847 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3848 if(h->slice_type_nos==FF_B_TYPE)
3849 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3851 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3852 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3853 h->ref_count[0]= h->ref_count[1]= 1;
3857 if(h->slice_type_nos == FF_B_TYPE)
3864 if(!default_ref_list_done){
3865 fill_default_ref_list(h);
3868 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3871 if(h->slice_type_nos!=FF_I_TYPE){
3872 s->last_picture_ptr= &h->ref_list[0][0];
3873 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3875 if(h->slice_type_nos==FF_B_TYPE){
3876 s->next_picture_ptr= &h->ref_list[1][0];
3877 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3880 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3881 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3882 pred_weight_table(h);
3883 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3884 implicit_weight_table(h);
3889 decode_ref_pic_marking(h0, &s->gb);
3892 fill_mbaff_ref_list(h);
3894 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3895 direct_dist_scale_factor(h);
3896 direct_ref_list_init(h);
3898 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3899 tmp = get_ue_golomb(&s->gb);
3901 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3904 h->cabac_init_idc= tmp;
3907 h->last_qscale_diff = 0;
3908 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3910 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3914 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3915 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3916 //FIXME qscale / qp ... stuff
3917 if(h->slice_type == FF_SP_TYPE){
3918 get_bits1(&s->gb); /* sp_for_switch_flag */
3920 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3921 get_se_golomb(&s->gb); /* slice_qs_delta */
3924 h->deblocking_filter = 1;
3925 h->slice_alpha_c0_offset = 0;
3926 h->slice_beta_offset = 0;
3927 if( h->pps.deblocking_filter_parameters_present ) {
3928 tmp= get_ue_golomb(&s->gb);
3930 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3933 h->deblocking_filter= tmp;
3934 if(h->deblocking_filter < 2)
3935 h->deblocking_filter^= 1; // 1<->0
3937 if( h->deblocking_filter ) {
3938 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3939 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3943 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3944 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3945 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3946 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3947 h->deblocking_filter= 0;
3949 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3950 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3951 /* Cheat slightly for speed:
3952 Do not bother to deblock across slices. */
3953 h->deblocking_filter = 2;
3955 h0->max_contexts = 1;
3956 if(!h0->single_decode_warning) {
3957 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3958 h0->single_decode_warning = 1;
3961 return 1; // deblocking switched inside frame
3966 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3967 slice_group_change_cycle= get_bits(&s->gb, ?);
3970 h0->last_slice_type = slice_type;
3971 h->slice_num = ++h0->current_slice;
3972 if(h->slice_num >= MAX_SLICES){
3973 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3977 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3981 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3982 +(h->ref_list[j][i].reference&3);
3985 for(i=16; i<48; i++)
3986 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3987 +(h->ref_list[j][i].reference&3);
3990 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3991 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3993 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3994 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3996 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3998 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3999 pps_id, h->frame_num,
4000 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4001 h->ref_count[0], h->ref_count[1],
4003 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4005 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4006 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4016 static inline int get_level_prefix(GetBitContext *gb){
4020 OPEN_READER(re, gb);
4021 UPDATE_CACHE(re, gb);
4022 buf=GET_CACHE(re, gb);
4024 log= 32 - av_log2(buf);
4026 print_bin(buf>>(32-log), log);
4027 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4030 LAST_SKIP_BITS(re, gb, log);
4031 CLOSE_READER(re, gb);
4036 static inline int get_dct8x8_allowed(H264Context *h){
4037 if(h->sps.direct_8x8_inference_flag)
4038 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4040 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4044 * decodes a residual block.
4045 * @param n block index
4046 * @param scantable scantable
4047 * @param max_coeff number of coefficients in the block
4048 * @return <0 if an error occurred
4050 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4051 MpegEncContext * const s = &h->s;
4052 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4054 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4056 //FIXME put trailing_onex into the context
4058 if(n == CHROMA_DC_BLOCK_INDEX){
4059 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4060 total_coeff= coeff_token>>2;
4062 if(n == LUMA_DC_BLOCK_INDEX){
4063 total_coeff= pred_non_zero_count(h, 0);
4064 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4065 total_coeff= coeff_token>>2;
4067 total_coeff= pred_non_zero_count(h, n);
4068 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4069 total_coeff= coeff_token>>2;
4070 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4074 //FIXME set last_non_zero?
4078 if(total_coeff > (unsigned)max_coeff) {
4079 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4083 trailing_ones= coeff_token&3;
4084 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4085 assert(total_coeff<=16);
4087 i = show_bits(gb, 3);
4088 skip_bits(gb, trailing_ones);
4089 level[0] = 1-((i&4)>>1);
4090 level[1] = 1-((i&2) );
4091 level[2] = 1-((i&1)<<1);
4093 if(trailing_ones<total_coeff) {
4094 int level_code, mask;
4095 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4096 int prefix= get_level_prefix(gb);
4098 //first coefficient has suffix_length equal to 0 or 1
4099 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4101 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4103 level_code= (prefix<<suffix_length); //part
4104 }else if(prefix==14){
4106 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4108 level_code= prefix + get_bits(gb, 4); //part
4110 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4111 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4113 level_code += (1<<(prefix-3))-4096;
4116 if(trailing_ones < 3) level_code += 2;
4121 mask= -(level_code&1);
4122 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4124 //remaining coefficients have suffix_length > 0
4125 for(i=trailing_ones+1;i<total_coeff;i++) {
4126 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4127 prefix = get_level_prefix(gb);
4129 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4131 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4133 level_code += (1<<(prefix-3))-4096;
4135 mask= -(level_code&1);
4136 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4137 if(level_code > suffix_limit[suffix_length])
4142 if(total_coeff == max_coeff)
4145 if(n == CHROMA_DC_BLOCK_INDEX)
4146 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4148 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4151 coeff_num = zeros_left + total_coeff - 1;
4152 j = scantable[coeff_num];
4154 block[j] = level[0];
4155 for(i=1;i<total_coeff;i++) {
4158 else if(zeros_left < 7){
4159 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4161 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4163 zeros_left -= run_before;
4164 coeff_num -= 1 + run_before;
4165 j= scantable[ coeff_num ];
4170 block[j] = (level[0] * qmul[j] + 32)>>6;
4171 for(i=1;i<total_coeff;i++) {
4174 else if(zeros_left < 7){
4175 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4177 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4179 zeros_left -= run_before;
4180 coeff_num -= 1 + run_before;
4181 j= scantable[ coeff_num ];
4183 block[j]= (level[i] * qmul[j] + 32)>>6;
4188 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4195 static void predict_field_decoding_flag(H264Context *h){
4196 MpegEncContext * const s = &h->s;
4197 const int mb_xy= h->mb_xy;
4198 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4199 ? s->current_picture.mb_type[mb_xy-1]
4200 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4201 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4203 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4207 * decodes a P_SKIP or B_SKIP macroblock
4209 static void decode_mb_skip(H264Context *h){
4210 MpegEncContext * const s = &h->s;
4211 const int mb_xy= h->mb_xy;
4214 memset(h->non_zero_count[mb_xy], 0, 16);
4215 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4218 mb_type|= MB_TYPE_INTERLACED;
4220 if( h->slice_type_nos == FF_B_TYPE )
4222 // just for fill_caches. pred_direct_motion will set the real mb_type
4223 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4225 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4226 pred_direct_motion(h, &mb_type);
4227 mb_type|= MB_TYPE_SKIP;
4232 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4234 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4235 pred_pskip_motion(h, &mx, &my);
4236 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4237 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4240 write_back_motion(h, mb_type);
4241 s->current_picture.mb_type[mb_xy]= mb_type;
4242 s->current_picture.qscale_table[mb_xy]= s->qscale;
4243 h->slice_table[ mb_xy ]= h->slice_num;
4244 h->prev_mb_skipped= 1;
4248 * decodes a macroblock
4249 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4251 static int decode_mb_cavlc(H264Context *h){
4252 MpegEncContext * const s = &h->s;
4254 int partition_count;
4255 unsigned int mb_type, cbp;
4256 int dct8x8_allowed= h->pps.transform_8x8_mode;
4258 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4260 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4261 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4263 if(h->slice_type_nos != FF_I_TYPE){
4264 if(s->mb_skip_run==-1)
4265 s->mb_skip_run= get_ue_golomb(&s->gb);
4267 if (s->mb_skip_run--) {
4268 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4269 if(s->mb_skip_run==0)
4270 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4272 predict_field_decoding_flag(h);
4279 if( (s->mb_y&1) == 0 )
4280 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4283 h->prev_mb_skipped= 0;
4285 mb_type= get_ue_golomb(&s->gb);
4286 if(h->slice_type_nos == FF_B_TYPE){
4288 partition_count= b_mb_type_info[mb_type].partition_count;
4289 mb_type= b_mb_type_info[mb_type].type;
4292 goto decode_intra_mb;
4294 }else if(h->slice_type_nos == FF_P_TYPE){
4296 partition_count= p_mb_type_info[mb_type].partition_count;
4297 mb_type= p_mb_type_info[mb_type].type;
4300 goto decode_intra_mb;
4303 assert(h->slice_type_nos == FF_I_TYPE);
4304 if(h->slice_type == FF_SI_TYPE && mb_type)
4308 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4312 cbp= i_mb_type_info[mb_type].cbp;
4313 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4314 mb_type= i_mb_type_info[mb_type].type;
4318 mb_type |= MB_TYPE_INTERLACED;
4320 h->slice_table[ mb_xy ]= h->slice_num;
4322 if(IS_INTRA_PCM(mb_type)){
4325 // We assume these blocks are very rare so we do not optimize it.
4326 align_get_bits(&s->gb);
4328 // The pixels are stored in the same order as levels in h->mb array.
4329 for(x=0; x < (CHROMA ? 384 : 256); x++){
4330 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4333 // In deblocking, the quantizer is 0
4334 s->current_picture.qscale_table[mb_xy]= 0;
4335 // All coeffs are present
4336 memset(h->non_zero_count[mb_xy], 16, 16);
4338 s->current_picture.mb_type[mb_xy]= mb_type;
4343 h->ref_count[0] <<= 1;
4344 h->ref_count[1] <<= 1;
4347 fill_caches(h, mb_type, 0);
4350 if(IS_INTRA(mb_type)){
4352 // init_top_left_availability(h);
4353 if(IS_INTRA4x4(mb_type)){
4356 if(dct8x8_allowed && get_bits1(&s->gb)){
4357 mb_type |= MB_TYPE_8x8DCT;
4361 // fill_intra4x4_pred_table(h);
4362 for(i=0; i<16; i+=di){
4363 int mode= pred_intra_mode(h, i);
4365 if(!get_bits1(&s->gb)){
4366 const int rem_mode= get_bits(&s->gb, 3);
4367 mode = rem_mode + (rem_mode >= mode);
4371 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4373 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4375 write_back_intra_pred_mode(h);
4376 if( check_intra4x4_pred_mode(h) < 0)
4379 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4380 if(h->intra16x16_pred_mode < 0)
4384 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4387 h->chroma_pred_mode= pred_mode;
4389 }else if(partition_count==4){
4390 int i, j, sub_partition_count[4], list, ref[2][4];
4392 if(h->slice_type_nos == FF_B_TYPE){
4394 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4395 if(h->sub_mb_type[i] >=13){
4396 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4399 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4400 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4402 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4403 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4404 pred_direct_motion(h, &mb_type);
4405 h->ref_cache[0][scan8[4]] =
4406 h->ref_cache[1][scan8[4]] =
4407 h->ref_cache[0][scan8[12]] =
4408 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4411 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4413 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4414 if(h->sub_mb_type[i] >=4){
4415 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4418 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4419 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4423 for(list=0; list<h->list_count; list++){
4424 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4426 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4427 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4428 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4430 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4442 dct8x8_allowed = get_dct8x8_allowed(h);
4444 for(list=0; list<h->list_count; list++){
4446 if(IS_DIRECT(h->sub_mb_type[i])) {
4447 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4450 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4451 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4453 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4454 const int sub_mb_type= h->sub_mb_type[i];
4455 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4456 for(j=0; j<sub_partition_count[i]; j++){
4458 const int index= 4*i + block_width*j;
4459 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4460 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4461 mx += get_se_golomb(&s->gb);
4462 my += get_se_golomb(&s->gb);
4463 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4465 if(IS_SUB_8X8(sub_mb_type)){
4467 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4469 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4470 }else if(IS_SUB_8X4(sub_mb_type)){
4471 mv_cache[ 1 ][0]= mx;
4472 mv_cache[ 1 ][1]= my;
4473 }else if(IS_SUB_4X8(sub_mb_type)){
4474 mv_cache[ 8 ][0]= mx;
4475 mv_cache[ 8 ][1]= my;
4477 mv_cache[ 0 ][0]= mx;
4478 mv_cache[ 0 ][1]= my;
4481 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4487 }else if(IS_DIRECT(mb_type)){
4488 pred_direct_motion(h, &mb_type);
4489 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4491 int list, mx, my, i;
4492 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4493 if(IS_16X16(mb_type)){
4494 for(list=0; list<h->list_count; list++){
4496 if(IS_DIR(mb_type, 0, list)){
4497 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4498 if(val >= h->ref_count[list]){
4499 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4503 val= LIST_NOT_USED&0xFF;
4504 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4506 for(list=0; list<h->list_count; list++){
4508 if(IS_DIR(mb_type, 0, list)){
4509 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4510 mx += get_se_golomb(&s->gb);
4511 my += get_se_golomb(&s->gb);
4512 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4514 val= pack16to32(mx,my);
4517 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4520 else if(IS_16X8(mb_type)){
4521 for(list=0; list<h->list_count; list++){
4524 if(IS_DIR(mb_type, i, list)){
4525 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4526 if(val >= h->ref_count[list]){
4527 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4531 val= LIST_NOT_USED&0xFF;
4532 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4535 for(list=0; list<h->list_count; list++){
4538 if(IS_DIR(mb_type, i, list)){
4539 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4540 mx += get_se_golomb(&s->gb);
4541 my += get_se_golomb(&s->gb);
4542 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4544 val= pack16to32(mx,my);
4547 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4551 assert(IS_8X16(mb_type));
4552 for(list=0; list<h->list_count; list++){
4555 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4556 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4557 if(val >= h->ref_count[list]){
4558 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4562 val= LIST_NOT_USED&0xFF;
4563 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4566 for(list=0; list<h->list_count; list++){
4569 if(IS_DIR(mb_type, i, list)){
4570 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4571 mx += get_se_golomb(&s->gb);
4572 my += get_se_golomb(&s->gb);
4573 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4575 val= pack16to32(mx,my);
4578 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4584 if(IS_INTER(mb_type))
4585 write_back_motion(h, mb_type);
4587 if(!IS_INTRA16x16(mb_type)){
4588 cbp= get_ue_golomb(&s->gb);
4590 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4595 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4596 else cbp= golomb_to_inter_cbp [cbp];
4598 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4599 else cbp= golomb_to_inter_cbp_gray[cbp];
4604 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4605 if(get_bits1(&s->gb)){
4606 mb_type |= MB_TYPE_8x8DCT;
4607 h->cbp_table[mb_xy]= cbp;
4610 s->current_picture.mb_type[mb_xy]= mb_type;
4612 if(cbp || IS_INTRA16x16(mb_type)){
4613 int i8x8, i4x4, chroma_idx;
4615 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4616 const uint8_t *scan, *scan8x8, *dc_scan;
4618 // fill_non_zero_count_cache(h);
4620 if(IS_INTERLACED(mb_type)){
4621 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4622 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4623 dc_scan= luma_dc_field_scan;
4625 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4626 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4627 dc_scan= luma_dc_zigzag_scan;
4630 dquant= get_se_golomb(&s->gb);
4632 if( dquant > 25 || dquant < -26 ){
4633 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4637 s->qscale += dquant;
4638 if(((unsigned)s->qscale) > 51){
4639 if(s->qscale<0) s->qscale+= 52;
4640 else s->qscale-= 52;
4643 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4644 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4645 if(IS_INTRA16x16(mb_type)){
4646 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4647 return -1; //FIXME continue if partitioned and other return -1 too
4650 assert((cbp&15) == 0 || (cbp&15) == 15);
4653 for(i8x8=0; i8x8<4; i8x8++){
4654 for(i4x4=0; i4x4<4; i4x4++){
4655 const int index= i4x4 + 4*i8x8;
4656 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4662 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4665 for(i8x8=0; i8x8<4; i8x8++){
4666 if(cbp & (1<<i8x8)){
4667 if(IS_8x8DCT(mb_type)){
4668 DCTELEM *buf = &h->mb[64*i8x8];
4670 for(i4x4=0; i4x4<4; i4x4++){
4671 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4672 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4675 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4676 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4678 for(i4x4=0; i4x4<4; i4x4++){
4679 const int index= i4x4 + 4*i8x8;
4681 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4687 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4688 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4694 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4695 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4701 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4702 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4703 for(i4x4=0; i4x4<4; i4x4++){
4704 const int index= 16 + 4*chroma_idx + i4x4;
4705 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4711 uint8_t * const nnz= &h->non_zero_count_cache[0];
4712 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4713 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4716 uint8_t * const nnz= &h->non_zero_count_cache[0];
4717 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4718 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4719 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4721 s->current_picture.qscale_table[mb_xy]= s->qscale;
4722 write_back_non_zero_count(h);
4725 h->ref_count[0] >>= 1;
4726 h->ref_count[1] >>= 1;
4732 static int decode_cabac_field_decoding_flag(H264Context *h) {
4733 MpegEncContext * const s = &h->s;
4734 const int mb_x = s->mb_x;
4735 const int mb_y = s->mb_y & ~1;
4736 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4737 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4739 unsigned int ctx = 0;
4741 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4744 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4748 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4751 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4752 uint8_t *state= &h->cabac_state[ctx_base];
4756 MpegEncContext * const s = &h->s;
4757 const int mba_xy = h->left_mb_xy[0];
4758 const int mbb_xy = h->top_mb_xy;
4760 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4762 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4764 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4765 return 0; /* I4x4 */
4768 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4769 return 0; /* I4x4 */
4772 if( get_cabac_terminate( &h->cabac ) )
4773 return 25; /* PCM */
4775 mb_type = 1; /* I16x16 */
4776 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4777 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4778 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4779 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4780 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4784 static int decode_cabac_mb_type( H264Context *h ) {
4785 MpegEncContext * const s = &h->s;
4787 if( h->slice_type_nos == FF_I_TYPE ) {
4788 return decode_cabac_intra_mb_type(h, 3, 1);
4789 } else if( h->slice_type_nos == FF_P_TYPE ) {
4790 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4792 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4793 /* P_L0_D16x16, P_8x8 */
4794 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4796 /* P_L0_D8x16, P_L0_D16x8 */
4797 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4800 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4803 const int mba_xy = h->left_mb_xy[0];
4804 const int mbb_xy = h->top_mb_xy;
4807 assert(h->slice_type_nos == FF_B_TYPE);
4809 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4811 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4814 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4815 return 0; /* B_Direct_16x16 */
4817 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4818 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4821 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4822 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4823 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4824 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4826 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4827 else if( bits == 13 ) {
4828 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4829 } else if( bits == 14 )
4830 return 11; /* B_L1_L0_8x16 */
4831 else if( bits == 15 )
4832 return 22; /* B_8x8 */
4834 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4835 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4839 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4840 MpegEncContext * const s = &h->s;
4844 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4845 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4848 && h->slice_table[mba_xy] == h->slice_num
4849 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4850 mba_xy += s->mb_stride;
4852 mbb_xy = mb_xy - s->mb_stride;
4854 && h->slice_table[mbb_xy] == h->slice_num
4855 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4856 mbb_xy -= s->mb_stride;
4858 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4860 int mb_xy = h->mb_xy;
4862 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4865 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4867 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4870 if( h->slice_type_nos == FF_B_TYPE )
4872 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4875 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4878 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4881 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4882 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4883 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4885 if( mode >= pred_mode )
4891 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4892 const int mba_xy = h->left_mb_xy[0];
4893 const int mbb_xy = h->top_mb_xy;
4897 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4898 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4901 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4904 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4907 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4909 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4915 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4916 int cbp_b, cbp_a, ctx, cbp = 0;
4918 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4919 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4921 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4922 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4923 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4924 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4925 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4926 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4927 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4928 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4931 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4935 cbp_a = (h->left_cbp>>4)&0x03;
4936 cbp_b = (h-> top_cbp>>4)&0x03;
4939 if( cbp_a > 0 ) ctx++;
4940 if( cbp_b > 0 ) ctx += 2;
4941 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4945 if( cbp_a == 2 ) ctx++;
4946 if( cbp_b == 2 ) ctx += 2;
4947 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4949 static int decode_cabac_mb_dqp( H264Context *h) {
4950 int ctx= h->last_qscale_diff != 0;
4953 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4956 if(val > 102) //prevent infinite loop
4961 return (val + 1)>>1 ;
4963 return -((val + 1)>>1);
4965 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4966 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4968 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4970 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4974 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4976 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4977 return 0; /* B_Direct_8x8 */
4978 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4979 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4981 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4982 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4983 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4986 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4987 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4991 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4992 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4995 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4996 int refa = h->ref_cache[list][scan8[n] - 1];
4997 int refb = h->ref_cache[list][scan8[n] - 8];
5001 if( h->slice_type_nos == FF_B_TYPE) {
5002 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5004 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5013 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5019 if(ref >= 32 /*h->ref_list[list]*/){
5026 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5027 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5028 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5029 int ctxbase = (l == 0) ? 40 : 47;
5031 int ctx = (amvd>2) + (amvd>32);
5033 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5038 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5046 while( get_cabac_bypass( &h->cabac ) ) {
5050 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5055 if( get_cabac_bypass( &h->cabac ) )
5059 return get_cabac_bypass_sign( &h->cabac, -mvd );
5062 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5068 nza = h->left_cbp&0x100;
5069 nzb = h-> top_cbp&0x100;
5071 nza = (h->left_cbp>>(6+idx))&0x01;
5072 nzb = (h-> top_cbp>>(6+idx))&0x01;
5075 assert(cat == 1 || cat == 2 || cat == 4);
5076 nza = h->non_zero_count_cache[scan8[idx] - 1];
5077 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5086 return ctx + 4 * cat;
5089 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5090 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5091 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5092 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5093 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5096 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5097 static const int significant_coeff_flag_offset[2][6] = {
5098 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5099 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5101 static const int last_coeff_flag_offset[2][6] = {
5102 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5103 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5105 static const int coeff_abs_level_m1_offset[6] = {
5106 227+0, 227+10, 227+20, 227+30, 227+39, 426
5108 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5109 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5110 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5111 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5112 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5113 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5114 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5115 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5116 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5118 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5119 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5120 * map node ctx => cabac ctx for level=1 */
5121 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5122 /* map node ctx => cabac ctx for level>1 */
5123 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5124 static const uint8_t coeff_abs_level_transition[2][8] = {
5125 /* update node ctx after decoding a level=1 */
5126 { 1, 2, 3, 3, 4, 5, 6, 7 },
5127 /* update node ctx after decoding a level>1 */
5128 { 4, 4, 4, 4, 5, 6, 7, 7 }
5134 int coeff_count = 0;
5137 uint8_t *significant_coeff_ctx_base;
5138 uint8_t *last_coeff_ctx_base;
5139 uint8_t *abs_level_m1_ctx_base;
5142 #define CABAC_ON_STACK
5144 #ifdef CABAC_ON_STACK
5147 cc.range = h->cabac.range;
5148 cc.low = h->cabac.low;
5149 cc.bytestream= h->cabac.bytestream;
5151 #define CC &h->cabac
5155 /* cat: 0-> DC 16x16 n = 0
5156 * 1-> AC 16x16 n = luma4x4idx
5157 * 2-> Luma4x4 n = luma4x4idx
5158 * 3-> DC Chroma n = iCbCr
5159 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5160 * 5-> Luma8x8 n = 4 * luma8x8idx
5163 /* read coded block flag */
5164 if( is_dc || cat != 5 ) {
5165 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5167 h->non_zero_count_cache[scan8[n]] = 0;
5169 #ifdef CABAC_ON_STACK
5170 h->cabac.range = cc.range ;
5171 h->cabac.low = cc.low ;
5172 h->cabac.bytestream= cc.bytestream;
5178 significant_coeff_ctx_base = h->cabac_state
5179 + significant_coeff_flag_offset[MB_FIELD][cat];
5180 last_coeff_ctx_base = h->cabac_state
5181 + last_coeff_flag_offset[MB_FIELD][cat];
5182 abs_level_m1_ctx_base = h->cabac_state
5183 + coeff_abs_level_m1_offset[cat];
5185 if( !is_dc && cat == 5 ) {
5186 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5187 for(last= 0; last < coefs; last++) { \
5188 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5189 if( get_cabac( CC, sig_ctx )) { \
5190 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5191 index[coeff_count++] = last; \
5192 if( get_cabac( CC, last_ctx ) ) { \
5198 if( last == max_coeff -1 ) {\
5199 index[coeff_count++] = last;\
5201 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5202 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5203 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5205 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5207 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5209 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5212 assert(coeff_count > 0);
5216 h->cbp_table[h->mb_xy] |= 0x100;
5218 h->cbp_table[h->mb_xy] |= 0x40 << n;
5221 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5223 assert( cat == 1 || cat == 2 || cat == 4 );
5224 h->non_zero_count_cache[scan8[n]] = coeff_count;
5229 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5231 int j= scantable[index[--coeff_count]];
5233 if( get_cabac( CC, ctx ) == 0 ) {
5234 node_ctx = coeff_abs_level_transition[0][node_ctx];
5236 block[j] = get_cabac_bypass_sign( CC, -1);
5238 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5242 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5243 node_ctx = coeff_abs_level_transition[1][node_ctx];
5245 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5249 if( coeff_abs >= 15 ) {
5251 while( get_cabac_bypass( CC ) ) {
5257 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5263 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5265 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5268 } while( coeff_count );
5269 #ifdef CABAC_ON_STACK
5270 h->cabac.range = cc.range ;
5271 h->cabac.low = cc.low ;
5272 h->cabac.bytestream= cc.bytestream;
5277 #ifndef CONFIG_SMALL
5278 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5279 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5282 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5283 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5287 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5289 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5291 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5292 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5296 static inline void compute_mb_neighbors(H264Context *h)
5298 MpegEncContext * const s = &h->s;
5299 const int mb_xy = h->mb_xy;
5300 h->top_mb_xy = mb_xy - s->mb_stride;
5301 h->left_mb_xy[0] = mb_xy - 1;
5303 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5304 const int top_pair_xy = pair_xy - s->mb_stride;
5305 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5306 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5307 const int curr_mb_frame_flag = !MB_FIELD;
5308 const int bottom = (s->mb_y & 1);
5310 if (!curr_mb_frame_flag && (bottom || !top_mb_frame_flag)){
5311 h->top_mb_xy -= s->mb_stride;
5313 if (left_mb_frame_flag != curr_mb_frame_flag) {
5314 h->left_mb_xy[0] = pair_xy - 1;
5316 } else if (FIELD_PICTURE) {
5317 h->top_mb_xy -= s->mb_stride;
5323 * decodes a macroblock
5324 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5326 static int decode_mb_cabac(H264Context *h) {
5327 MpegEncContext * const s = &h->s;
5329 int mb_type, partition_count, cbp = 0;
5330 int dct8x8_allowed= h->pps.transform_8x8_mode;
5332 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5334 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5335 if( h->slice_type_nos != FF_I_TYPE ) {
5337 /* a skipped mb needs the aff flag from the following mb */
5338 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5339 predict_field_decoding_flag(h);
5340 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5341 skip = h->next_mb_skipped;
5343 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5344 /* read skip flags */
5346 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5347 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5348 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5349 if(!h->next_mb_skipped)
5350 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5355 h->cbp_table[mb_xy] = 0;
5356 h->chroma_pred_mode_table[mb_xy] = 0;
5357 h->last_qscale_diff = 0;
5364 if( (s->mb_y&1) == 0 )
5366 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5369 h->prev_mb_skipped = 0;
5371 compute_mb_neighbors(h);
5372 mb_type = decode_cabac_mb_type( h );
5373 assert(mb_type >= 0);
5375 if( h->slice_type_nos == FF_B_TYPE ) {
5377 partition_count= b_mb_type_info[mb_type].partition_count;
5378 mb_type= b_mb_type_info[mb_type].type;
5381 goto decode_intra_mb;
5383 } else if( h->slice_type_nos == FF_P_TYPE ) {
5385 partition_count= p_mb_type_info[mb_type].partition_count;
5386 mb_type= p_mb_type_info[mb_type].type;
5389 goto decode_intra_mb;
5392 if(h->slice_type == FF_SI_TYPE && mb_type)
5394 assert(h->slice_type_nos == FF_I_TYPE);
5396 partition_count = 0;
5397 cbp= i_mb_type_info[mb_type].cbp;
5398 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5399 mb_type= i_mb_type_info[mb_type].type;
5402 mb_type |= MB_TYPE_INTERLACED;
5404 h->slice_table[ mb_xy ]= h->slice_num;
5406 if(IS_INTRA_PCM(mb_type)) {
5409 // We assume these blocks are very rare so we do not optimize it.
5410 // FIXME The two following lines get the bitstream position in the cabac
5411 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5412 ptr= h->cabac.bytestream;
5413 if(h->cabac.low&0x1) ptr--;
5415 if(h->cabac.low&0x1FF) ptr--;
5418 // The pixels are stored in the same order as levels in h->mb array.
5419 memcpy(h->mb, ptr, 256); ptr+=256;
5421 memcpy(h->mb+128, ptr, 128); ptr+=128;
5424 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5426 // All blocks are present
5427 h->cbp_table[mb_xy] = 0x1ef;
5428 h->chroma_pred_mode_table[mb_xy] = 0;
5429 // In deblocking, the quantizer is 0
5430 s->current_picture.qscale_table[mb_xy]= 0;
5431 // All coeffs are present
5432 memset(h->non_zero_count[mb_xy], 16, 16);
5433 s->current_picture.mb_type[mb_xy]= mb_type;
5434 h->last_qscale_diff = 0;
5439 h->ref_count[0] <<= 1;
5440 h->ref_count[1] <<= 1;
5443 fill_caches(h, mb_type, 0);
5445 if( IS_INTRA( mb_type ) ) {
5447 if( IS_INTRA4x4( mb_type ) ) {
5448 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5449 mb_type |= MB_TYPE_8x8DCT;
5450 for( i = 0; i < 16; i+=4 ) {
5451 int pred = pred_intra_mode( h, i );
5452 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5453 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5456 for( i = 0; i < 16; i++ ) {
5457 int pred = pred_intra_mode( h, i );
5458 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5460 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5463 write_back_intra_pred_mode(h);
5464 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5466 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5467 if( h->intra16x16_pred_mode < 0 ) return -1;
5470 h->chroma_pred_mode_table[mb_xy] =
5471 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5473 pred_mode= check_intra_pred_mode( h, pred_mode );
5474 if( pred_mode < 0 ) return -1;
5475 h->chroma_pred_mode= pred_mode;
5477 } else if( partition_count == 4 ) {
5478 int i, j, sub_partition_count[4], list, ref[2][4];
5480 if( h->slice_type_nos == FF_B_TYPE ) {
5481 for( i = 0; i < 4; i++ ) {
5482 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5483 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5484 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5486 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5487 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5488 pred_direct_motion(h, &mb_type);
5489 h->ref_cache[0][scan8[4]] =
5490 h->ref_cache[1][scan8[4]] =
5491 h->ref_cache[0][scan8[12]] =
5492 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5493 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5494 for( i = 0; i < 4; i++ )
5495 if( IS_DIRECT(h->sub_mb_type[i]) )
5496 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5500 for( i = 0; i < 4; i++ ) {
5501 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5502 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5503 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5507 for( list = 0; list < h->list_count; list++ ) {
5508 for( i = 0; i < 4; i++ ) {
5509 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5510 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5511 if( h->ref_count[list] > 1 ){
5512 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5513 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5514 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5522 h->ref_cache[list][ scan8[4*i]+1 ]=
5523 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5528 dct8x8_allowed = get_dct8x8_allowed(h);
5530 for(list=0; list<h->list_count; list++){
5532 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5533 if(IS_DIRECT(h->sub_mb_type[i])){
5534 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5538 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5539 const int sub_mb_type= h->sub_mb_type[i];
5540 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5541 for(j=0; j<sub_partition_count[i]; j++){
5544 const int index= 4*i + block_width*j;
5545 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5546 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5547 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5549 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5550 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5551 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5553 if(IS_SUB_8X8(sub_mb_type)){
5555 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5557 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5560 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5562 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5563 }else if(IS_SUB_8X4(sub_mb_type)){
5564 mv_cache[ 1 ][0]= mx;
5565 mv_cache[ 1 ][1]= my;
5567 mvd_cache[ 1 ][0]= mx - mpx;
5568 mvd_cache[ 1 ][1]= my - mpy;
5569 }else if(IS_SUB_4X8(sub_mb_type)){
5570 mv_cache[ 8 ][0]= mx;
5571 mv_cache[ 8 ][1]= my;
5573 mvd_cache[ 8 ][0]= mx - mpx;
5574 mvd_cache[ 8 ][1]= my - mpy;
5576 mv_cache[ 0 ][0]= mx;
5577 mv_cache[ 0 ][1]= my;
5579 mvd_cache[ 0 ][0]= mx - mpx;
5580 mvd_cache[ 0 ][1]= my - mpy;
5583 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5584 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5585 p[0] = p[1] = p[8] = p[9] = 0;
5586 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5590 } else if( IS_DIRECT(mb_type) ) {
5591 pred_direct_motion(h, &mb_type);
5592 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5593 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5594 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5596 int list, mx, my, i, mpx, mpy;
5597 if(IS_16X16(mb_type)){
5598 for(list=0; list<h->list_count; list++){
5599 if(IS_DIR(mb_type, 0, list)){
5601 if(h->ref_count[list] > 1){
5602 ref= decode_cabac_mb_ref(h, list, 0);
5603 if(ref >= (unsigned)h->ref_count[list]){
5604 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5609 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5611 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5613 for(list=0; list<h->list_count; list++){
5614 if(IS_DIR(mb_type, 0, list)){
5615 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5617 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5618 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5619 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5621 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5622 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5624 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5627 else if(IS_16X8(mb_type)){
5628 for(list=0; list<h->list_count; list++){
5630 if(IS_DIR(mb_type, i, list)){
5632 if(h->ref_count[list] > 1){
5633 ref= decode_cabac_mb_ref( h, list, 8*i );
5634 if(ref >= (unsigned)h->ref_count[list]){
5635 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5640 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5642 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5645 for(list=0; list<h->list_count; list++){
5647 if(IS_DIR(mb_type, i, list)){
5648 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5649 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5650 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5651 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5653 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5654 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5656 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5657 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5662 assert(IS_8X16(mb_type));
5663 for(list=0; list<h->list_count; list++){
5665 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5667 if(h->ref_count[list] > 1){
5668 ref= decode_cabac_mb_ref( h, list, 4*i );
5669 if(ref >= (unsigned)h->ref_count[list]){
5670 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5675 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5677 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5680 for(list=0; list<h->list_count; list++){
5682 if(IS_DIR(mb_type, i, list)){
5683 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5684 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5685 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5687 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5688 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5689 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5691 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5692 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5699 if( IS_INTER( mb_type ) ) {
5700 h->chroma_pred_mode_table[mb_xy] = 0;
5701 write_back_motion( h, mb_type );
5704 if( !IS_INTRA16x16( mb_type ) ) {
5705 cbp = decode_cabac_mb_cbp_luma( h );
5707 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5710 h->cbp_table[mb_xy] = h->cbp = cbp;
5712 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5713 if( decode_cabac_mb_transform_size( h ) )
5714 mb_type |= MB_TYPE_8x8DCT;
5716 s->current_picture.mb_type[mb_xy]= mb_type;
5718 if( cbp || IS_INTRA16x16( mb_type ) ) {
5719 const uint8_t *scan, *scan8x8, *dc_scan;
5720 const uint32_t *qmul;
5723 if(IS_INTERLACED(mb_type)){
5724 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5725 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5726 dc_scan= luma_dc_field_scan;
5728 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5729 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5730 dc_scan= luma_dc_zigzag_scan;
5733 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5734 if( dqp == INT_MIN ){
5735 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5739 if(((unsigned)s->qscale) > 51){
5740 if(s->qscale<0) s->qscale+= 52;
5741 else s->qscale-= 52;
5743 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5744 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5746 if( IS_INTRA16x16( mb_type ) ) {
5748 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5749 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5752 qmul = h->dequant4_coeff[0][s->qscale];
5753 for( i = 0; i < 16; i++ ) {
5754 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5755 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5758 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5762 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5763 if( cbp & (1<<i8x8) ) {
5764 if( IS_8x8DCT(mb_type) ) {
5765 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5766 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5768 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5769 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5770 const int index = 4*i8x8 + i4x4;
5771 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5773 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5774 //STOP_TIMER("decode_residual")
5778 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5779 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5786 for( c = 0; c < 2; c++ ) {
5787 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5788 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5794 for( c = 0; c < 2; c++ ) {
5795 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5796 for( i = 0; i < 4; i++ ) {
5797 const int index = 16 + 4 * c + i;
5798 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5799 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5803 uint8_t * const nnz= &h->non_zero_count_cache[0];
5804 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5805 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5808 uint8_t * const nnz= &h->non_zero_count_cache[0];
5809 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5810 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5811 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5812 h->last_qscale_diff = 0;
5815 s->current_picture.qscale_table[mb_xy]= s->qscale;
5816 write_back_non_zero_count(h);
5819 h->ref_count[0] >>= 1;
5820 h->ref_count[1] >>= 1;
5827 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5828 const int index_a = qp + h->slice_alpha_c0_offset;
5829 const int alpha = (alpha_table+52)[index_a];
5830 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5834 tc[0] = (tc0_table+52)[index_a][bS[0]];
5835 tc[1] = (tc0_table+52)[index_a][bS[1]];
5836 tc[2] = (tc0_table+52)[index_a][bS[2]];
5837 tc[3] = (tc0_table+52)[index_a][bS[3]];
5838 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5840 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5843 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5844 const int index_a = qp + h->slice_alpha_c0_offset;
5845 const int alpha = (alpha_table+52)[index_a];
5846 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5850 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5851 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5852 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5853 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5854 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5856 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5860 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5862 for( i = 0; i < 16; i++, pix += stride) {
5868 int bS_index = (i >> 1);
5871 bS_index |= (i & 1);
5874 if( bS[bS_index] == 0 ) {
5878 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5879 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5880 alpha = (alpha_table+52)[index_a];
5881 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5883 if( bS[bS_index] < 4 ) {
5884 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5885 const int p0 = pix[-1];
5886 const int p1 = pix[-2];
5887 const int p2 = pix[-3];
5888 const int q0 = pix[0];
5889 const int q1 = pix[1];
5890 const int q2 = pix[2];
5892 if( FFABS( p0 - q0 ) < alpha &&
5893 FFABS( p1 - p0 ) < beta &&
5894 FFABS( q1 - q0 ) < beta ) {
5898 if( FFABS( p2 - p0 ) < beta ) {
5899 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5902 if( FFABS( q2 - q0 ) < beta ) {
5903 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5907 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5908 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5909 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5910 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5913 const int p0 = pix[-1];
5914 const int p1 = pix[-2];
5915 const int p2 = pix[-3];
5917 const int q0 = pix[0];
5918 const int q1 = pix[1];
5919 const int q2 = pix[2];
5921 if( FFABS( p0 - q0 ) < alpha &&
5922 FFABS( p1 - p0 ) < beta &&
5923 FFABS( q1 - q0 ) < beta ) {
5925 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5926 if( FFABS( p2 - p0 ) < beta)
5928 const int p3 = pix[-4];
5930 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5931 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5932 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5935 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5937 if( FFABS( q2 - q0 ) < beta)
5939 const int q3 = pix[3];
5941 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5942 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5943 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5946 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5950 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5951 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5953 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5958 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5960 for( i = 0; i < 8; i++, pix += stride) {
5968 if( bS[bS_index] == 0 ) {
5972 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5973 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5974 alpha = (alpha_table+52)[index_a];
5975 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5977 if( bS[bS_index] < 4 ) {
5978 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
5979 const int p0 = pix[-1];
5980 const int p1 = pix[-2];
5981 const int q0 = pix[0];
5982 const int q1 = pix[1];
5984 if( FFABS( p0 - q0 ) < alpha &&
5985 FFABS( p1 - p0 ) < beta &&
5986 FFABS( q1 - q0 ) < beta ) {
5987 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5989 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5990 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5991 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5994 const int p0 = pix[-1];
5995 const int p1 = pix[-2];
5996 const int q0 = pix[0];
5997 const int q1 = pix[1];
5999 if( FFABS( p0 - q0 ) < alpha &&
6000 FFABS( p1 - p0 ) < beta &&
6001 FFABS( q1 - q0 ) < beta ) {
6003 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6004 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6005 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6011 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6012 const int index_a = qp + h->slice_alpha_c0_offset;
6013 const int alpha = (alpha_table+52)[index_a];
6014 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6018 tc[0] = (tc0_table+52)[index_a][bS[0]];
6019 tc[1] = (tc0_table+52)[index_a][bS[1]];
6020 tc[2] = (tc0_table+52)[index_a][bS[2]];
6021 tc[3] = (tc0_table+52)[index_a][bS[3]];
6022 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6024 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6028 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6029 const int index_a = qp + h->slice_alpha_c0_offset;
6030 const int alpha = (alpha_table+52)[index_a];
6031 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6035 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6036 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6037 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6038 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6039 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6041 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6045 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6046 MpegEncContext * const s = &h->s;
6047 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6049 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6053 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6054 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6055 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6056 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6057 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6060 assert(!FRAME_MBAFF);
6062 mb_type = s->current_picture.mb_type[mb_xy];
6063 qp = s->current_picture.qscale_table[mb_xy];
6064 qp0 = s->current_picture.qscale_table[mb_xy-1];
6065 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6066 qpc = get_chroma_qp( h, 0, qp );
6067 qpc0 = get_chroma_qp( h, 0, qp0 );
6068 qpc1 = get_chroma_qp( h, 0, qp1 );
6069 qp0 = (qp + qp0 + 1) >> 1;
6070 qp1 = (qp + qp1 + 1) >> 1;
6071 qpc0 = (qpc + qpc0 + 1) >> 1;
6072 qpc1 = (qpc + qpc1 + 1) >> 1;
6073 qp_thresh = 15 - h->slice_alpha_c0_offset;
6074 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6075 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6078 if( IS_INTRA(mb_type) ) {
6079 int16_t bS4[4] = {4,4,4,4};
6080 int16_t bS3[4] = {3,3,3,3};
6081 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6082 if( IS_8x8DCT(mb_type) ) {
6083 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6084 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6085 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6086 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6088 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6089 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6090 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6091 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6092 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6093 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6094 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6095 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6097 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6098 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6099 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6100 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6101 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6102 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6103 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6104 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6107 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6108 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6110 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6112 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6114 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6115 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6116 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6117 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6119 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6120 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6121 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6122 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6124 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6125 bSv[0][0] = 0x0004000400040004ULL;
6126 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6127 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6129 #define FILTER(hv,dir,edge)\
6130 if(bSv[dir][edge]) {\
6131 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6133 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6134 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6140 } else if( IS_8x8DCT(mb_type) ) {
6160 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6161 MpegEncContext * const s = &h->s;
6163 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6164 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6165 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6166 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6167 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6169 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6170 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6171 // how often to recheck mv-based bS when iterating between edges
6172 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6173 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6174 // how often to recheck mv-based bS when iterating along each edge
6175 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6177 if (first_vertical_edge_done) {
6181 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6184 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6185 && !IS_INTERLACED(mb_type)
6186 && IS_INTERLACED(mbm_type)
6188 // This is a special case in the norm where the filtering must
6189 // be done twice (one each of the field) even if we are in a
6190 // frame macroblock.
6192 static const int nnz_idx[4] = {4,5,6,3};
6193 unsigned int tmp_linesize = 2 * linesize;
6194 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6195 int mbn_xy = mb_xy - 2 * s->mb_stride;
6200 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6201 if( IS_INTRA(mb_type) ||
6202 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6203 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6205 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6206 for( i = 0; i < 4; i++ ) {
6207 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6208 mbn_nnz[nnz_idx[i]] != 0 )
6214 // Do not use s->qscale as luma quantizer because it has not the same
6215 // value in IPCM macroblocks.
6216 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6217 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6218 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6219 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6220 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6221 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6222 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6223 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6230 for( edge = start; edge < edges; edge++ ) {
6231 /* mbn_xy: neighbor macroblock */
6232 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6233 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6234 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6238 if( (edge&1) && IS_8x8DCT(mb_type) )
6241 if( IS_INTRA(mb_type) ||
6242 IS_INTRA(mbn_type) ) {
6245 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6246 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6255 bS[0] = bS[1] = bS[2] = bS[3] = value;
6260 if( edge & mask_edge ) {
6261 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6264 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6265 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6268 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6269 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6270 int bn_idx= b_idx - (dir ? 8:1);
6273 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6274 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6275 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6276 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6279 if(h->slice_type_nos == FF_B_TYPE && v){
6281 for( l = 0; !v && l < 2; l++ ) {
6283 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6284 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6285 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6289 bS[0] = bS[1] = bS[2] = bS[3] = v;
6295 for( i = 0; i < 4; i++ ) {
6296 int x = dir == 0 ? edge : i;
6297 int y = dir == 0 ? i : edge;
6298 int b_idx= 8 + 4 + x + 8*y;
6299 int bn_idx= b_idx - (dir ? 8:1);
6301 if( h->non_zero_count_cache[b_idx] |
6302 h->non_zero_count_cache[bn_idx] ) {
6308 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6309 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6310 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6311 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6317 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6319 for( l = 0; l < 2; l++ ) {
6321 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6322 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6323 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6332 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6337 // Do not use s->qscale as luma quantizer because it has not the same
6338 // value in IPCM macroblocks.
6339 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6340 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6341 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6342 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6344 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6345 if( (edge&1) == 0 ) {
6346 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6347 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6348 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6349 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6352 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6353 if( (edge&1) == 0 ) {
6354 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6355 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6356 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6357 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6363 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6364 MpegEncContext * const s = &h->s;
6365 const int mb_xy= mb_x + mb_y*s->mb_stride;
6366 const int mb_type = s->current_picture.mb_type[mb_xy];
6367 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6368 int first_vertical_edge_done = 0;
6371 //for sufficiently low qp, filtering wouldn't do anything
6372 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6374 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6375 int qp = s->current_picture.qscale_table[mb_xy];
6377 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6378 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6383 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6384 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6385 int top_type, left_type[2];
6386 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6387 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6388 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6390 if(IS_8x8DCT(top_type)){
6391 h->non_zero_count_cache[4+8*0]=
6392 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6393 h->non_zero_count_cache[6+8*0]=
6394 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6396 if(IS_8x8DCT(left_type[0])){
6397 h->non_zero_count_cache[3+8*1]=
6398 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6400 if(IS_8x8DCT(left_type[1])){
6401 h->non_zero_count_cache[3+8*3]=
6402 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6405 if(IS_8x8DCT(mb_type)){
6406 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6407 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6409 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6410 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6412 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6413 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6415 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6416 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6421 // left mb is in picture
6422 && h->slice_table[mb_xy-1] != 0xFFFF
6423 // and current and left pair do not have the same interlaced type
6424 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6425 // and left mb is in the same slice if deblocking_filter == 2
6426 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6427 /* First vertical edge is different in MBAFF frames
6428 * There are 8 different bS to compute and 2 different Qp
6430 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6431 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6436 int mb_qp, mbn0_qp, mbn1_qp;
6438 first_vertical_edge_done = 1;
6440 if( IS_INTRA(mb_type) )
6441 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6443 for( i = 0; i < 8; i++ ) {
6444 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6446 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6448 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6449 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6450 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6452 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6459 mb_qp = s->current_picture.qscale_table[mb_xy];
6460 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6461 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6462 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6463 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6464 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6465 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6466 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6467 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6468 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6469 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6470 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6471 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6474 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6475 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6476 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6477 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6478 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6482 for( dir = 0; dir < 2; dir++ )
6483 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6485 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6486 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6490 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6491 H264Context *h = *(void**)arg;
6492 MpegEncContext * const s = &h->s;
6493 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6497 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6498 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6500 if( h->pps.cabac ) {
6504 align_get_bits( &s->gb );
6507 ff_init_cabac_states( &h->cabac);
6508 ff_init_cabac_decoder( &h->cabac,
6509 s->gb.buffer + get_bits_count(&s->gb)/8,
6510 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6511 /* calculate pre-state */
6512 for( i= 0; i < 460; i++ ) {
6514 if( h->slice_type_nos == FF_I_TYPE )
6515 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6517 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6520 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6522 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6527 int ret = decode_mb_cabac(h);
6529 //STOP_TIMER("decode_mb_cabac")
6531 if(ret>=0) hl_decode_mb(h);
6533 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6536 ret = decode_mb_cabac(h);
6538 if(ret>=0) hl_decode_mb(h);
6541 eos = get_cabac_terminate( &h->cabac );
6543 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6544 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6545 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6549 if( ++s->mb_x >= s->mb_width ) {
6551 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6553 if(FIELD_OR_MBAFF_PICTURE) {
6558 if( eos || s->mb_y >= s->mb_height ) {
6559 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6560 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6567 int ret = decode_mb_cavlc(h);
6569 if(ret>=0) hl_decode_mb(h);
6571 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6573 ret = decode_mb_cavlc(h);
6575 if(ret>=0) hl_decode_mb(h);
6580 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6581 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6586 if(++s->mb_x >= s->mb_width){
6588 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6590 if(FIELD_OR_MBAFF_PICTURE) {
6593 if(s->mb_y >= s->mb_height){
6594 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6596 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6597 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6601 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6608 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6609 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6610 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6611 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6615 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6624 for(;s->mb_y < s->mb_height; s->mb_y++){
6625 for(;s->mb_x < s->mb_width; s->mb_x++){
6626 int ret= decode_mb(h);
6631 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6632 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6637 if(++s->mb_x >= s->mb_width){
6639 if(++s->mb_y >= s->mb_height){
6640 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6641 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6645 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6652 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6653 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6654 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6658 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6665 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6668 return -1; //not reached
6671 static int decode_picture_timing(H264Context *h){
6672 MpegEncContext * const s = &h->s;
6673 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6674 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6675 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6677 if(h->sps.pic_struct_present_flag){
6678 unsigned int i, num_clock_ts;
6679 h->sei_pic_struct = get_bits(&s->gb, 4);
6681 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6684 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6686 for (i = 0 ; i < num_clock_ts ; i++){
6687 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6688 unsigned int full_timestamp_flag;
6689 skip_bits(&s->gb, 2); /* ct_type */
6690 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6691 skip_bits(&s->gb, 5); /* counting_type */
6692 full_timestamp_flag = get_bits(&s->gb, 1);
6693 skip_bits(&s->gb, 1); /* discontinuity_flag */
6694 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6695 skip_bits(&s->gb, 8); /* n_frames */
6696 if(full_timestamp_flag){
6697 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6698 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6699 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6701 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6702 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6703 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6704 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6705 if(get_bits(&s->gb, 1)) /* hours_flag */
6706 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6710 if(h->sps.time_offset_length > 0)
6711 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6718 static int decode_unregistered_user_data(H264Context *h, int size){
6719 MpegEncContext * const s = &h->s;
6720 uint8_t user_data[16+256];
6726 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6727 user_data[i]= get_bits(&s->gb, 8);
6731 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6732 if(e==1 && build>=0)
6733 h->x264_build= build;
6735 if(s->avctx->debug & FF_DEBUG_BUGS)
6736 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6739 skip_bits(&s->gb, 8);
6744 static int decode_sei(H264Context *h){
6745 MpegEncContext * const s = &h->s;
6747 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6752 type+= show_bits(&s->gb, 8);
6753 }while(get_bits(&s->gb, 8) == 255);
6757 size+= show_bits(&s->gb, 8);
6758 }while(get_bits(&s->gb, 8) == 255);
6761 case 1: // Picture timing SEI
6762 if(decode_picture_timing(h) < 0)
6766 if(decode_unregistered_user_data(h, size) < 0)
6770 skip_bits(&s->gb, 8*size);
6773 //FIXME check bits here
6774 align_get_bits(&s->gb);
6780 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6781 MpegEncContext * const s = &h->s;
6783 cpb_count = get_ue_golomb(&s->gb) + 1;
6785 if(cpb_count > 32U){
6786 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6790 get_bits(&s->gb, 4); /* bit_rate_scale */
6791 get_bits(&s->gb, 4); /* cpb_size_scale */
6792 for(i=0; i<cpb_count; i++){
6793 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6794 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6795 get_bits1(&s->gb); /* cbr_flag */
6797 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6798 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6799 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6800 sps->time_offset_length = get_bits(&s->gb, 5);
6804 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6805 MpegEncContext * const s = &h->s;
6806 int aspect_ratio_info_present_flag;
6807 unsigned int aspect_ratio_idc;
6809 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6811 if( aspect_ratio_info_present_flag ) {
6812 aspect_ratio_idc= get_bits(&s->gb, 8);
6813 if( aspect_ratio_idc == EXTENDED_SAR ) {
6814 sps->sar.num= get_bits(&s->gb, 16);
6815 sps->sar.den= get_bits(&s->gb, 16);
6816 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6817 sps->sar= pixel_aspect[aspect_ratio_idc];
6819 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6826 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6828 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6829 get_bits1(&s->gb); /* overscan_appropriate_flag */
6832 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6833 get_bits(&s->gb, 3); /* video_format */
6834 get_bits1(&s->gb); /* video_full_range_flag */
6835 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6836 get_bits(&s->gb, 8); /* colour_primaries */
6837 get_bits(&s->gb, 8); /* transfer_characteristics */
6838 get_bits(&s->gb, 8); /* matrix_coefficients */
6842 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6843 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6844 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6847 sps->timing_info_present_flag = get_bits1(&s->gb);
6848 if(sps->timing_info_present_flag){
6849 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6850 sps->time_scale = get_bits_long(&s->gb, 32);
6851 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6854 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6855 if(sps->nal_hrd_parameters_present_flag)
6856 if(decode_hrd_parameters(h, sps) < 0)
6858 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6859 if(sps->vcl_hrd_parameters_present_flag)
6860 if(decode_hrd_parameters(h, sps) < 0)
6862 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6863 get_bits1(&s->gb); /* low_delay_hrd_flag */
6864 sps->pic_struct_present_flag = get_bits1(&s->gb);
6866 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6867 if(sps->bitstream_restriction_flag){
6868 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6869 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6870 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6871 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6872 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6873 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6874 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6876 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6877 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6885 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6886 const uint8_t *jvt_list, const uint8_t *fallback_list){
6887 MpegEncContext * const s = &h->s;
6888 int i, last = 8, next = 8;
6889 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6890 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6891 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6893 for(i=0;i<size;i++){
6895 next = (last + get_se_golomb(&s->gb)) & 0xff;
6896 if(!i && !next){ /* matrix not written, we use the preset one */
6897 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6900 last = factors[scan[i]] = next ? next : last;
6904 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6905 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6906 MpegEncContext * const s = &h->s;
6907 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6908 const uint8_t *fallback[4] = {
6909 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6910 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6911 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6912 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6914 if(get_bits1(&s->gb)){
6915 sps->scaling_matrix_present |= is_sps;
6916 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6917 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6918 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6919 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6920 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6921 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6922 if(is_sps || pps->transform_8x8_mode){
6923 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6924 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6929 static inline int decode_seq_parameter_set(H264Context *h){
6930 MpegEncContext * const s = &h->s;
6931 int profile_idc, level_idc;
6932 unsigned int sps_id;
6936 profile_idc= get_bits(&s->gb, 8);
6937 get_bits1(&s->gb); //constraint_set0_flag
6938 get_bits1(&s->gb); //constraint_set1_flag
6939 get_bits1(&s->gb); //constraint_set2_flag
6940 get_bits1(&s->gb); //constraint_set3_flag
6941 get_bits(&s->gb, 4); // reserved
6942 level_idc= get_bits(&s->gb, 8);
6943 sps_id= get_ue_golomb(&s->gb);
6945 if(sps_id >= MAX_SPS_COUNT) {
6946 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
6949 sps= av_mallocz(sizeof(SPS));
6953 sps->profile_idc= profile_idc;
6954 sps->level_idc= level_idc;
6956 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
6957 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
6958 sps->scaling_matrix_present = 0;
6960 if(sps->profile_idc >= 100){ //high profile
6961 sps->chroma_format_idc= get_ue_golomb(&s->gb);
6962 if(sps->chroma_format_idc == 3)
6963 get_bits1(&s->gb); //residual_color_transform_flag
6964 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
6965 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
6966 sps->transform_bypass = get_bits1(&s->gb);
6967 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
6969 sps->chroma_format_idc= 1;
6972 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
6973 sps->poc_type= get_ue_golomb(&s->gb);
6975 if(sps->poc_type == 0){ //FIXME #define
6976 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
6977 } else if(sps->poc_type == 1){//FIXME #define
6978 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
6979 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
6980 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
6981 sps->poc_cycle_length = get_ue_golomb(&s->gb);
6983 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
6984 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
6988 for(i=0; i<sps->poc_cycle_length; i++)
6989 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
6990 }else if(sps->poc_type != 2){
6991 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
6995 sps->ref_frame_count= get_ue_golomb(&s->gb);
6996 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
6997 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7000 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7001 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7002 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7003 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7004 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7005 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7009 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7010 if(!sps->frame_mbs_only_flag)
7011 sps->mb_aff= get_bits1(&s->gb);
7015 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7017 #ifndef ALLOW_INTERLACE
7019 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7021 sps->crop= get_bits1(&s->gb);
7023 sps->crop_left = get_ue_golomb(&s->gb);
7024 sps->crop_right = get_ue_golomb(&s->gb);
7025 sps->crop_top = get_ue_golomb(&s->gb);
7026 sps->crop_bottom= get_ue_golomb(&s->gb);
7027 if(sps->crop_left || sps->crop_top){
7028 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7030 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7031 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7037 sps->crop_bottom= 0;
7040 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7041 if( sps->vui_parameters_present_flag )
7042 decode_vui_parameters(h, sps);
7044 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7045 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7046 sps_id, sps->profile_idc, sps->level_idc,
7048 sps->ref_frame_count,
7049 sps->mb_width, sps->mb_height,
7050 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7051 sps->direct_8x8_inference_flag ? "8B8" : "",
7052 sps->crop_left, sps->crop_right,
7053 sps->crop_top, sps->crop_bottom,
7054 sps->vui_parameters_present_flag ? "VUI" : "",
7055 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7058 av_free(h->sps_buffers[sps_id]);
7059 h->sps_buffers[sps_id]= sps;
7067 build_qp_table(PPS *pps, int t, int index)
7070 for(i = 0; i < 52; i++)
7071 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7074 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7075 MpegEncContext * const s = &h->s;
7076 unsigned int pps_id= get_ue_golomb(&s->gb);
7079 if(pps_id >= MAX_PPS_COUNT) {
7080 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7084 pps= av_mallocz(sizeof(PPS));
7087 pps->sps_id= get_ue_golomb(&s->gb);
7088 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7089 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7093 pps->cabac= get_bits1(&s->gb);
7094 pps->pic_order_present= get_bits1(&s->gb);
7095 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7096 if(pps->slice_group_count > 1 ){
7097 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7098 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7099 switch(pps->mb_slice_group_map_type){
7102 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7103 | run_length[ i ] |1 |ue(v) |
7108 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7110 | top_left_mb[ i ] |1 |ue(v) |
7111 | bottom_right_mb[ i ] |1 |ue(v) |
7119 | slice_group_change_direction_flag |1 |u(1) |
7120 | slice_group_change_rate_minus1 |1 |ue(v) |
7125 | slice_group_id_cnt_minus1 |1 |ue(v) |
7126 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7128 | slice_group_id[ i ] |1 |u(v) |
7133 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7134 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7135 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7136 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7140 pps->weighted_pred= get_bits1(&s->gb);
7141 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7142 pps->init_qp= get_se_golomb(&s->gb) + 26;
7143 pps->init_qs= get_se_golomb(&s->gb) + 26;
7144 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7145 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7146 pps->constrained_intra_pred= get_bits1(&s->gb);
7147 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7149 pps->transform_8x8_mode= 0;
7150 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7151 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7152 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7154 if(get_bits_count(&s->gb) < bit_length){
7155 pps->transform_8x8_mode= get_bits1(&s->gb);
7156 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7157 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7159 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7162 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7163 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7164 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7165 h->pps.chroma_qp_diff= 1;
7167 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7168 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7169 pps_id, pps->sps_id,
7170 pps->cabac ? "CABAC" : "CAVLC",
7171 pps->slice_group_count,
7172 pps->ref_count[0], pps->ref_count[1],
7173 pps->weighted_pred ? "weighted" : "",
7174 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7175 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7176 pps->constrained_intra_pred ? "CONSTR" : "",
7177 pps->redundant_pic_cnt_present ? "REDU" : "",
7178 pps->transform_8x8_mode ? "8x8DCT" : ""
7182 av_free(h->pps_buffers[pps_id]);
7183 h->pps_buffers[pps_id]= pps;
7191 * Call decode_slice() for each context.
7193 * @param h h264 master context
7194 * @param context_count number of contexts to execute
7196 static void execute_decode_slices(H264Context *h, int context_count){
7197 MpegEncContext * const s = &h->s;
7198 AVCodecContext * const avctx= s->avctx;
7202 if(context_count == 1) {
7203 decode_slice(avctx, &h);
7205 for(i = 1; i < context_count; i++) {
7206 hx = h->thread_context[i];
7207 hx->s.error_recognition = avctx->error_recognition;
7208 hx->s.error_count = 0;
7211 avctx->execute(avctx, (void *)decode_slice,
7212 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7214 /* pull back stuff from slices to master context */
7215 hx = h->thread_context[context_count - 1];
7216 s->mb_x = hx->s.mb_x;
7217 s->mb_y = hx->s.mb_y;
7218 s->dropable = hx->s.dropable;
7219 s->picture_structure = hx->s.picture_structure;
7220 for(i = 1; i < context_count; i++)
7221 h->s.error_count += h->thread_context[i]->s.error_count;
7226 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7227 MpegEncContext * const s = &h->s;
7228 AVCodecContext * const avctx= s->avctx;
7230 H264Context *hx; ///< thread context
7231 int context_count = 0;
7233 h->max_contexts = avctx->thread_count;
7236 for(i=0; i<50; i++){
7237 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7240 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7241 h->current_slice = 0;
7242 if (!s->first_field)
7243 s->current_picture_ptr= NULL;
7255 if(buf_index >= buf_size) break;
7257 for(i = 0; i < h->nal_length_size; i++)
7258 nalsize = (nalsize << 8) | buf[buf_index++];
7259 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7264 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7269 // start code prefix search
7270 for(; buf_index + 3 < buf_size; buf_index++){
7271 // This should always succeed in the first iteration.
7272 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7276 if(buf_index+3 >= buf_size) break;
7281 hx = h->thread_context[context_count];
7283 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7284 if (ptr==NULL || dst_length < 0){
7287 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7289 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7291 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7292 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7295 if (h->is_avc && (nalsize != consumed)){
7296 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7300 buf_index += consumed;
7302 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7303 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7308 switch(hx->nal_unit_type){
7310 if (h->nal_unit_type != NAL_IDR_SLICE) {
7311 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7314 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7316 init_get_bits(&hx->s.gb, ptr, bit_length);
7318 hx->inter_gb_ptr= &hx->s.gb;
7319 hx->s.data_partitioning = 0;
7321 if((err = decode_slice_header(hx, h)))
7324 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7325 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7326 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7327 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7328 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7329 && avctx->skip_frame < AVDISCARD_ALL)
7333 init_get_bits(&hx->s.gb, ptr, bit_length);
7335 hx->inter_gb_ptr= NULL;
7336 hx->s.data_partitioning = 1;
7338 err = decode_slice_header(hx, h);
7341 init_get_bits(&hx->intra_gb, ptr, bit_length);
7342 hx->intra_gb_ptr= &hx->intra_gb;
7345 init_get_bits(&hx->inter_gb, ptr, bit_length);
7346 hx->inter_gb_ptr= &hx->inter_gb;
7348 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7349 && s->context_initialized
7351 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7352 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7353 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7354 && avctx->skip_frame < AVDISCARD_ALL)
7358 init_get_bits(&s->gb, ptr, bit_length);
7362 init_get_bits(&s->gb, ptr, bit_length);
7363 decode_seq_parameter_set(h);
7365 if(s->flags& CODEC_FLAG_LOW_DELAY)
7368 if(avctx->has_b_frames < 2)
7369 avctx->has_b_frames= !s->low_delay;
7372 init_get_bits(&s->gb, ptr, bit_length);
7374 decode_picture_parameter_set(h, bit_length);
7378 case NAL_END_SEQUENCE:
7379 case NAL_END_STREAM:
7380 case NAL_FILLER_DATA:
7382 case NAL_AUXILIARY_SLICE:
7385 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7388 if(context_count == h->max_contexts) {
7389 execute_decode_slices(h, context_count);
7394 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7396 /* Slice could not be decoded in parallel mode, copy down
7397 * NAL unit stuff to context 0 and restart. Note that
7398 * rbsp_buffer is not transferred, but since we no longer
7399 * run in parallel mode this should not be an issue. */
7400 h->nal_unit_type = hx->nal_unit_type;
7401 h->nal_ref_idc = hx->nal_ref_idc;
7407 execute_decode_slices(h, context_count);
7412 * returns the number of bytes consumed for building the current frame
7414 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7415 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7416 if(pos+10>buf_size) pos=buf_size; // oops ;)
7421 static int decode_frame(AVCodecContext *avctx,
7422 void *data, int *data_size,
7423 const uint8_t *buf, int buf_size)
7425 H264Context *h = avctx->priv_data;
7426 MpegEncContext *s = &h->s;
7427 AVFrame *pict = data;
7430 s->flags= avctx->flags;
7431 s->flags2= avctx->flags2;
7433 /* end of stream, output what is still in the buffers */
7434 if (buf_size == 0) {
7438 //FIXME factorize this with the output code below
7439 out = h->delayed_pic[0];
7441 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7442 if(h->delayed_pic[i]->poc < out->poc){
7443 out = h->delayed_pic[i];
7447 for(i=out_idx; h->delayed_pic[i]; i++)
7448 h->delayed_pic[i] = h->delayed_pic[i+1];
7451 *data_size = sizeof(AVFrame);
7452 *pict= *(AVFrame*)out;
7458 if(h->is_avc && !h->got_avcC) {
7459 int i, cnt, nalsize;
7460 unsigned char *p = avctx->extradata;
7461 if(avctx->extradata_size < 7) {
7462 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7466 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7469 /* sps and pps in the avcC always have length coded with 2 bytes,
7470 so put a fake nal_length_size = 2 while parsing them */
7471 h->nal_length_size = 2;
7472 // Decode sps from avcC
7473 cnt = *(p+5) & 0x1f; // Number of sps
7475 for (i = 0; i < cnt; i++) {
7476 nalsize = AV_RB16(p) + 2;
7477 if(decode_nal_units(h, p, nalsize) < 0) {
7478 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7483 // Decode pps from avcC
7484 cnt = *(p++); // Number of pps
7485 for (i = 0; i < cnt; i++) {
7486 nalsize = AV_RB16(p) + 2;
7487 if(decode_nal_units(h, p, nalsize) != nalsize) {
7488 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7493 // Now store right nal length size, that will be use to parse all other nals
7494 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7495 // Do not reparse avcC
7499 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7500 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7505 buf_index=decode_nal_units(h, buf, buf_size);
7509 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7510 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7511 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7515 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7516 Picture *out = s->current_picture_ptr;
7517 Picture *cur = s->current_picture_ptr;
7518 int i, pics, cross_idr, out_of_order, out_idx;
7522 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7523 s->current_picture_ptr->pict_type= s->pict_type;
7526 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7527 h->prev_poc_msb= h->poc_msb;
7528 h->prev_poc_lsb= h->poc_lsb;
7530 h->prev_frame_num_offset= h->frame_num_offset;
7531 h->prev_frame_num= h->frame_num;
7534 * FIXME: Error handling code does not seem to support interlaced
7535 * when slices span multiple rows
7536 * The ff_er_add_slice calls don't work right for bottom
7537 * fields; they cause massive erroneous error concealing
7538 * Error marking covers both fields (top and bottom).
7539 * This causes a mismatched s->error_count
7540 * and a bad error table. Further, the error count goes to
7541 * INT_MAX when called for bottom field, because mb_y is
7542 * past end by one (callers fault) and resync_mb_y != 0
7543 * causes problems for the first MB line, too.
7550 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7551 /* Wait for second field. */
7555 cur->repeat_pict = 0;
7557 /* Signal interlacing information externally. */
7558 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7559 if(h->sps.pic_struct_present_flag){
7560 switch (h->sei_pic_struct)
7562 case SEI_PIC_STRUCT_FRAME:
7563 cur->interlaced_frame = 0;
7565 case SEI_PIC_STRUCT_TOP_FIELD:
7566 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7567 case SEI_PIC_STRUCT_TOP_BOTTOM:
7568 case SEI_PIC_STRUCT_BOTTOM_TOP:
7569 cur->interlaced_frame = 1;
7571 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7572 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7573 // Signal the possibility of telecined film externally (pic_struct 5,6)
7574 // From these hints, let the applications decide if they apply deinterlacing.
7575 cur->repeat_pict = 1;
7576 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7578 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7579 // Force progressive here, as doubling interlaced frame is a bad idea.
7580 cur->interlaced_frame = 0;
7581 cur->repeat_pict = 2;
7583 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7584 cur->interlaced_frame = 0;
7585 cur->repeat_pict = 4;
7589 /* Derive interlacing flag from used decoding process. */
7590 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7593 if (cur->field_poc[0] != cur->field_poc[1]){
7594 /* Derive top_field_first from field pocs. */
7595 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7597 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7598 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7599 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7600 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7601 cur->top_field_first = 1;
7603 cur->top_field_first = 0;
7605 /* Most likely progressive */
7606 cur->top_field_first = 0;
7610 //FIXME do something with unavailable reference frames
7612 /* Sort B-frames into display order */
7614 if(h->sps.bitstream_restriction_flag
7615 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7616 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7620 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7621 && !h->sps.bitstream_restriction_flag){
7622 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7627 while(h->delayed_pic[pics]) pics++;
7629 assert(pics <= MAX_DELAYED_PIC_COUNT);
7631 h->delayed_pic[pics++] = cur;
7632 if(cur->reference == 0)
7633 cur->reference = DELAYED_PIC_REF;
7635 out = h->delayed_pic[0];
7637 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7638 if(h->delayed_pic[i]->poc < out->poc){
7639 out = h->delayed_pic[i];
7642 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7644 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7646 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7648 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7650 ((!cross_idr && out->poc > h->outputed_poc + 2)
7651 || cur->pict_type == FF_B_TYPE)))
7654 s->avctx->has_b_frames++;
7657 if(out_of_order || pics > s->avctx->has_b_frames){
7658 out->reference &= ~DELAYED_PIC_REF;
7659 for(i=out_idx; h->delayed_pic[i]; i++)
7660 h->delayed_pic[i] = h->delayed_pic[i+1];
7662 if(!out_of_order && pics > s->avctx->has_b_frames){
7663 *data_size = sizeof(AVFrame);
7665 h->outputed_poc = out->poc;
7666 *pict= *(AVFrame*)out;
7668 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7673 assert(pict->data[0] || !*data_size);
7674 ff_print_debug_info(s, pict);
7675 //printf("out %d\n", (int)pict->data[0]);
7678 /* Return the Picture timestamp as the frame number */
7679 /* we subtract 1 because it is added on utils.c */
7680 avctx->frame_number = s->picture_number - 1;
7682 return get_consumed_bytes(s, buf_index, buf_size);
7685 static inline void fill_mb_avail(H264Context *h){
7686 MpegEncContext * const s = &h->s;
7687 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7690 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7691 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7692 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7698 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7699 h->mb_avail[4]= 1; //FIXME move out
7700 h->mb_avail[5]= 0; //FIXME move out
7708 #define SIZE (COUNT*40)
7714 // int int_temp[10000];
7716 AVCodecContext avctx;
7718 dsputil_init(&dsp, &avctx);
7720 init_put_bits(&pb, temp, SIZE);
7721 printf("testing unsigned exp golomb\n");
7722 for(i=0; i<COUNT; i++){
7724 set_ue_golomb(&pb, i);
7725 STOP_TIMER("set_ue_golomb");
7727 flush_put_bits(&pb);
7729 init_get_bits(&gb, temp, 8*SIZE);
7730 for(i=0; i<COUNT; i++){
7733 s= show_bits(&gb, 24);
7736 j= get_ue_golomb(&gb);
7738 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7741 STOP_TIMER("get_ue_golomb");
7745 init_put_bits(&pb, temp, SIZE);
7746 printf("testing signed exp golomb\n");
7747 for(i=0; i<COUNT; i++){
7749 set_se_golomb(&pb, i - COUNT/2);
7750 STOP_TIMER("set_se_golomb");
7752 flush_put_bits(&pb);
7754 init_get_bits(&gb, temp, 8*SIZE);
7755 for(i=0; i<COUNT; i++){
7758 s= show_bits(&gb, 24);
7761 j= get_se_golomb(&gb);
7762 if(j != i - COUNT/2){
7763 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7766 STOP_TIMER("get_se_golomb");
7770 printf("testing 4x4 (I)DCT\n");
7773 uint8_t src[16], ref[16];
7774 uint64_t error= 0, max_error=0;
7776 for(i=0; i<COUNT; i++){
7778 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7779 for(j=0; j<16; j++){
7780 ref[j]= random()%255;
7781 src[j]= random()%255;
7784 h264_diff_dct_c(block, src, ref, 4);
7787 for(j=0; j<16; j++){
7788 // printf("%d ", block[j]);
7789 block[j]= block[j]*4;
7790 if(j&1) block[j]= (block[j]*4 + 2)/5;
7791 if(j&4) block[j]= (block[j]*4 + 2)/5;
7795 s->dsp.h264_idct_add(ref, block, 4);
7796 /* for(j=0; j<16; j++){
7797 printf("%d ", ref[j]);
7801 for(j=0; j<16; j++){
7802 int diff= FFABS(src[j] - ref[j]);
7805 max_error= FFMAX(max_error, diff);
7808 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7809 printf("testing quantizer\n");
7810 for(qp=0; qp<52; qp++){
7812 src1_block[i]= src2_block[i]= random()%255;
7815 printf("Testing NAL layer\n");
7817 uint8_t bitstream[COUNT];
7818 uint8_t nal[COUNT*2];
7820 memset(&h, 0, sizeof(H264Context));
7822 for(i=0; i<COUNT; i++){
7830 for(j=0; j<COUNT; j++){
7831 bitstream[j]= (random() % 255) + 1;
7834 for(j=0; j<zeros; j++){
7835 int pos= random() % COUNT;
7836 while(bitstream[pos] == 0){
7845 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7847 printf("encoding failed\n");
7851 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7855 if(out_length != COUNT){
7856 printf("incorrect length %d %d\n", out_length, COUNT);
7860 if(consumed != nal_length){
7861 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7865 if(memcmp(bitstream, out, COUNT)){
7866 printf("mismatch\n");
7872 printf("Testing RBSP\n");
7880 static av_cold int decode_end(AVCodecContext *avctx)
7882 H264Context *h = avctx->priv_data;
7883 MpegEncContext *s = &h->s;
7886 av_freep(&h->rbsp_buffer[0]);
7887 av_freep(&h->rbsp_buffer[1]);
7888 free_tables(h); //FIXME cleanup init stuff perhaps
7890 for(i = 0; i < MAX_SPS_COUNT; i++)
7891 av_freep(h->sps_buffers + i);
7893 for(i = 0; i < MAX_PPS_COUNT; i++)
7894 av_freep(h->pps_buffers + i);
7898 // memset(h, 0, sizeof(H264Context));
7904 AVCodec h264_decoder = {
7908 sizeof(H264Context),
7913 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7915 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),