2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
36 #include "vdpau_internal.h"
40 #include "x86/h264_i386.h"
47 * Value of Picture.reference when Picture is not a reference picture, but
48 * is held for delayed output.
50 #define DELAYED_PIC_REF 4
52 static VLC coeff_token_vlc[4];
53 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
54 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
56 static VLC chroma_dc_coeff_token_vlc;
57 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
58 static const int chroma_dc_coeff_token_vlc_table_size = 256;
60 static VLC total_zeros_vlc[15];
61 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
62 static const int total_zeros_vlc_tables_size = 512;
64 static VLC chroma_dc_total_zeros_vlc[3];
65 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
66 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
68 static VLC run_vlc[6];
69 static VLC_TYPE run_vlc_tables[6][8][2];
70 static const int run_vlc_tables_size = 8;
73 static VLC_TYPE run7_vlc_table[96][2];
74 static const int run7_vlc_table_size = 96;
76 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
77 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
78 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 static Picture * remove_long(H264Context *h, int i, int ref_mask);
82 static av_always_inline uint32_t pack16to32(int a, int b){
83 #ifdef WORDS_BIGENDIAN
84 return (b&0xFFFF) + (a<<16);
86 return (a&0xFFFF) + (b<<16);
90 static const uint8_t rem6[52]={
91 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
94 static const uint8_t div6[52]={
95 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
98 static const int left_block_options[4][8]={
105 #define LEVEL_TAB_BITS 8
106 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
108 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
109 MpegEncContext * const s = &h->s;
110 const int mb_xy= h->mb_xy;
111 int topleft_xy, top_xy, topright_xy, left_xy[2];
112 int topleft_type, top_type, topright_type, left_type[2];
113 const int * left_block;
114 int topleft_partition= -1;
117 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
119 //FIXME deblocking could skip the intra and nnz parts.
120 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
123 /* Wow, what a mess, why didn't they simplify the interlacing & intra
124 * stuff, I can't imagine that these complex rules are worth it. */
126 topleft_xy = top_xy - 1;
127 topright_xy= top_xy + 1;
128 left_xy[1] = left_xy[0] = mb_xy-1;
129 left_block = left_block_options[0];
131 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
132 const int top_pair_xy = pair_xy - s->mb_stride;
133 const int topleft_pair_xy = top_pair_xy - 1;
134 const int topright_pair_xy = top_pair_xy + 1;
135 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
136 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
137 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
138 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
139 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
140 const int bottom = (s->mb_y & 1);
141 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
143 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
144 top_xy -= s->mb_stride;
146 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
147 topleft_xy -= s->mb_stride;
148 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
149 topleft_xy += s->mb_stride;
150 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
151 topleft_partition = 0;
153 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
154 topright_xy -= s->mb_stride;
156 if (left_mb_field_flag != curr_mb_field_flag) {
157 left_xy[1] = left_xy[0] = pair_xy - 1;
158 if (curr_mb_field_flag) {
159 left_xy[1] += s->mb_stride;
160 left_block = left_block_options[3];
162 left_block= left_block_options[2 - bottom];
167 h->top_mb_xy = top_xy;
168 h->left_mb_xy[0] = left_xy[0];
169 h->left_mb_xy[1] = left_xy[1];
173 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
174 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
175 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
177 if(MB_MBAFF && !IS_INTRA(mb_type)){
179 for(list=0; list<h->list_count; list++){
180 //These values where changed for ease of performing MC, we need to change them back
181 //FIXME maybe we can make MC and loop filter use the same values or prevent
182 //the MC code from changing ref_cache and rather use a temporary array.
183 if(USES_LIST(mb_type,list)){
184 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
185 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
186 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
188 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
189 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
194 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
195 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
196 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
197 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
198 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
200 if(IS_INTRA(mb_type)){
201 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
202 h->topleft_samples_available=
203 h->top_samples_available=
204 h->left_samples_available= 0xFFFF;
205 h->topright_samples_available= 0xEEEA;
207 if(!(top_type & type_mask)){
208 h->topleft_samples_available= 0xB3FF;
209 h->top_samples_available= 0x33FF;
210 h->topright_samples_available= 0x26EA;
212 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
213 if(IS_INTERLACED(mb_type)){
214 if(!(left_type[0] & type_mask)){
215 h->topleft_samples_available&= 0xDFFF;
216 h->left_samples_available&= 0x5FFF;
218 if(!(left_type[1] & type_mask)){
219 h->topleft_samples_available&= 0xFF5F;
220 h->left_samples_available&= 0xFF5F;
223 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
224 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
225 assert(left_xy[0] == left_xy[1]);
226 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
227 h->topleft_samples_available&= 0xDF5F;
228 h->left_samples_available&= 0x5F5F;
232 if(!(left_type[0] & type_mask)){
233 h->topleft_samples_available&= 0xDF5F;
234 h->left_samples_available&= 0x5F5F;
238 if(!(topleft_type & type_mask))
239 h->topleft_samples_available&= 0x7FFF;
241 if(!(topright_type & type_mask))
242 h->topright_samples_available&= 0xFBFF;
244 if(IS_INTRA4x4(mb_type)){
245 if(IS_INTRA4x4(top_type)){
246 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
247 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
248 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
249 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 if(!(top_type & type_mask))
257 h->intra4x4_pred_mode_cache[4+8*0]=
258 h->intra4x4_pred_mode_cache[5+8*0]=
259 h->intra4x4_pred_mode_cache[6+8*0]=
260 h->intra4x4_pred_mode_cache[7+8*0]= pred;
263 if(IS_INTRA4x4(left_type[i])){
264 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
265 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 if(!(left_type[i] & type_mask))
273 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
274 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
290 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
292 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
293 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
294 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
295 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
297 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
298 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
300 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
301 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
304 h->non_zero_count_cache[4+8*0]=
305 h->non_zero_count_cache[5+8*0]=
306 h->non_zero_count_cache[6+8*0]=
307 h->non_zero_count_cache[7+8*0]=
309 h->non_zero_count_cache[1+8*0]=
310 h->non_zero_count_cache[2+8*0]=
312 h->non_zero_count_cache[1+8*3]=
313 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
317 for (i=0; i<2; i++) {
319 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
320 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
321 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
322 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
324 h->non_zero_count_cache[3+8*1 + 2*8*i]=
325 h->non_zero_count_cache[3+8*2 + 2*8*i]=
326 h->non_zero_count_cache[0+8*1 + 8*i]=
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
334 h->top_cbp = h->cbp_table[top_xy];
335 } else if(IS_INTRA(mb_type)) {
342 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
343 } else if(IS_INTRA(mb_type)) {
349 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
352 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
357 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
359 for(list=0; list<h->list_count; list++){
360 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
361 /*if(!h->mv_cache_clean[list]){
362 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
363 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
364 h->mv_cache_clean[list]= 1;
368 h->mv_cache_clean[list]= 0;
370 if(USES_LIST(top_type, list)){
371 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
372 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
377 h->ref_cache[list][scan8[0] + 0 - 1*8]=
378 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
379 h->ref_cache[list][scan8[0] + 2 - 1*8]=
380 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
386 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
390 int cache_idx = scan8[0] - 1 + i*2*8;
391 if(USES_LIST(left_type[i], list)){
392 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
393 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
394 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
395 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
396 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
397 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
399 *(uint32_t*)h->mv_cache [list][cache_idx ]=
400 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
401 h->ref_cache[list][cache_idx ]=
402 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
406 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
409 if(USES_LIST(topleft_type, list)){
410 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
411 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
412 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
413 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
415 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
416 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
419 if(USES_LIST(topright_type, list)){
420 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
421 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
422 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
423 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
425 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
426 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
429 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432 h->ref_cache[list][scan8[5 ]+1] =
433 h->ref_cache[list][scan8[7 ]+1] =
434 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
435 h->ref_cache[list][scan8[4 ]] =
436 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
437 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
438 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
440 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
441 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
444 /* XXX beurk, Load mvd */
445 if(USES_LIST(top_type, list)){
446 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
457 if(USES_LIST(left_type[0], list)){
458 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
465 if(USES_LIST(left_type[1], list)){
466 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
473 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
474 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
476 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
477 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
479 if(h->slice_type_nos == FF_B_TYPE){
480 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
482 if(IS_DIRECT(top_type)){
483 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
484 }else if(IS_8X8(top_type)){
485 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
486 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
487 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
489 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
492 if(IS_DIRECT(left_type[0]))
493 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
494 else if(IS_8X8(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
497 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
499 if(IS_DIRECT(left_type[1]))
500 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
501 else if(IS_8X8(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
504 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
510 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
511 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
516 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
519 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
521 #define MAP_F2F(idx, mb_type)\
522 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
523 h->ref_cache[list][idx] <<= 1;\
524 h->mv_cache[list][idx][1] /= 2;\
525 h->mvd_cache[list][idx][1] /= 2;\
530 #define MAP_F2F(idx, mb_type)\
531 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
532 h->ref_cache[list][idx] >>= 1;\
533 h->mv_cache[list][idx][1] <<= 1;\
534 h->mvd_cache[list][idx][1] <<= 1;\
544 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
547 static inline void write_back_intra_pred_mode(H264Context *h){
548 const int mb_xy= h->mb_xy;
550 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
551 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
552 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
553 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
554 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
555 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
556 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
560 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
562 static inline int check_intra4x4_pred_mode(H264Context *h){
563 MpegEncContext * const s = &h->s;
564 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
565 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 if(!(h->top_samples_available&0x8000)){
570 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
572 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
580 if((h->left_samples_available&0x8888)!=0x8888){
581 static const int mask[4]={0x8000,0x2000,0x80,0x20};
583 if(!(h->left_samples_available&mask[i])){
584 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
586 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
596 } //FIXME cleanup like next
599 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
601 static inline int check_intra_pred_mode(H264Context *h, int mode){
602 MpegEncContext * const s = &h->s;
603 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
604 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
607 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
611 if(!(h->top_samples_available&0x8000)){
614 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
619 if((h->left_samples_available&0x8080) != 0x8080){
621 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
622 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
625 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 * gets the predicted intra4x4 prediction mode.
636 static inline int pred_intra_mode(H264Context *h, int n){
637 const int index8= scan8[n];
638 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
639 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
640 const int min= FFMIN(left, top);
642 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
644 if(min<0) return DC_PRED;
648 static inline void write_back_non_zero_count(H264Context *h){
649 const int mb_xy= h->mb_xy;
651 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
652 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
653 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
654 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
655 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
656 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
657 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
659 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
660 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
661 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
663 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
664 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
665 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
669 * gets the predicted number of non-zero coefficients.
670 * @param n block index
672 static inline int pred_non_zero_count(H264Context *h, int n){
673 const int index8= scan8[n];
674 const int left= h->non_zero_count_cache[index8 - 1];
675 const int top = h->non_zero_count_cache[index8 - 8];
678 if(i<64) i= (i+1)>>1;
680 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
685 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
686 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
687 MpegEncContext *s = &h->s;
689 /* there is no consistent mapping of mvs to neighboring locations that will
690 * make mbaff happy, so we can't move all this logic to fill_caches */
692 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
694 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
695 *C = h->mv_cache[list][scan8[0]-2];
698 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
699 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
700 if(IS_INTERLACED(mb_types[topright_xy])){
701 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
702 const int x4 = X4, y4 = Y4;\
703 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
704 if(!USES_LIST(mb_type,list))\
705 return LIST_NOT_USED;\
706 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
707 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
708 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
709 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
711 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
714 if(topright_ref == PART_NOT_AVAILABLE
715 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
716 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
718 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
719 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
722 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
724 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
725 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
731 if(topright_ref != PART_NOT_AVAILABLE){
732 *C= h->mv_cache[list][ i - 8 + part_width ];
735 tprintf(s->avctx, "topright MV not available\n");
737 *C= h->mv_cache[list][ i - 8 - 1 ];
738 return h->ref_cache[list][ i - 8 - 1 ];
743 * gets the predicted MV.
744 * @param n the block index
745 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
746 * @param mx the x component of the predicted motion vector
747 * @param my the y component of the predicted motion vector
749 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
750 const int index8= scan8[n];
751 const int top_ref= h->ref_cache[list][ index8 - 8 ];
752 const int left_ref= h->ref_cache[list][ index8 - 1 ];
753 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
754 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
756 int diagonal_ref, match_count;
758 assert(part_width==1 || part_width==2 || part_width==4);
768 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
769 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
770 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
771 if(match_count > 1){ //most common
772 *mx= mid_pred(A[0], B[0], C[0]);
773 *my= mid_pred(A[1], B[1], C[1]);
774 }else if(match_count==1){
778 }else if(top_ref==ref){
786 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
790 *mx= mid_pred(A[0], B[0], C[0]);
791 *my= mid_pred(A[1], B[1], C[1]);
795 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
799 * gets the directionally predicted 16x8 MV.
800 * @param n the block index
801 * @param mx the x component of the predicted motion vector
802 * @param my the y component of the predicted motion vector
804 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
806 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
807 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
809 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
817 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
818 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
820 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 pred_motion(h, n, 4, list, ref, mx, my);
834 * gets the directionally predicted 8x16 MV.
835 * @param n the block index
836 * @param mx the x component of the predicted motion vector
837 * @param my the y component of the predicted motion vector
839 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
841 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
842 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
844 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
855 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
857 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
859 if(diagonal_ref == ref){
867 pred_motion(h, n, 2, list, ref, mx, my);
870 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
871 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
872 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
874 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
876 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
877 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
878 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
884 pred_motion(h, 0, 4, 0, 0, mx, my);
889 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
890 int poc0 = h->ref_list[0][i].poc;
891 int td = av_clip(poc1 - poc0, -128, 127);
892 if(td == 0 || h->ref_list[0][i].long_ref){
895 int tb = av_clip(poc - poc0, -128, 127);
896 int tx = (16384 + (FFABS(td) >> 1)) / td;
897 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
901 static inline void direct_dist_scale_factor(H264Context * const h){
902 MpegEncContext * const s = &h->s;
903 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
904 const int poc1 = h->ref_list[1][0].poc;
906 for(field=0; field<2; field++){
907 const int poc = h->s.current_picture_ptr->field_poc[field];
908 const int poc1 = h->ref_list[1][0].field_poc[field];
909 for(i=0; i < 2*h->ref_count[0]; i++)
910 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
913 for(i=0; i<h->ref_count[0]; i++){
914 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
918 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 int j, old_ref, rfield;
922 int start= mbafi ? 16 : 0;
923 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
924 int interl= mbafi || s->picture_structure != PICT_FRAME;
926 /* bogus; fills in for missing frames */
927 memset(map[list], 0, sizeof(map[list]));
929 for(rfield=0; rfield<2; rfield++){
930 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
931 int poc = ref1->ref_poc[colfield][list][old_ref];
935 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
936 poc= (poc&~3) + rfield + 1;
938 for(j=start; j<end; j++){
939 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
940 int cur_ref= mbafi ? (j-16)^field : j;
941 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
943 map[list][old_ref] = cur_ref;
951 static inline void direct_ref_list_init(H264Context * const h){
952 MpegEncContext * const s = &h->s;
953 Picture * const ref1 = &h->ref_list[1][0];
954 Picture * const cur = s->current_picture_ptr;
956 int sidx= (s->picture_structure&1)^1;
957 int ref1sidx= (ref1->reference&1)^1;
959 for(list=0; list<2; list++){
960 cur->ref_count[sidx][list] = h->ref_count[list];
961 for(j=0; j<h->ref_count[list]; j++)
962 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
965 if(s->picture_structure == PICT_FRAME){
966 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
967 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
970 cur->mbaff= FRAME_MBAFF;
972 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
975 for(list=0; list<2; list++){
976 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
977 for(field=0; field<2; field++)
978 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
982 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
983 MpegEncContext * const s = &h->s;
984 int b8_stride = h->b8_stride;
985 int b4_stride = h->b_stride;
986 int mb_xy = h->mb_xy;
988 const int16_t (*l1mv0)[2], (*l1mv1)[2];
989 const int8_t *l1ref0, *l1ref1;
990 const int is_b8x8 = IS_8X8(*mb_type);
991 unsigned int sub_mb_type;
994 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
996 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
997 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
998 int cur_poc = s->current_picture_ptr->poc;
999 int *col_poc = h->ref_list[1]->field_poc;
1000 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1001 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1003 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1004 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1005 mb_xy += s->mb_stride*fieldoff;
1008 }else{ // AFL/AFR/FR/FL -> AFR/FR
1009 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1010 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1011 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1012 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1015 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1016 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1017 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1019 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1020 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1022 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1023 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1025 }else{ // AFR/FR -> AFR/FR
1028 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1029 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1030 /* FIXME save sub mb types from previous frames (or derive from MVs)
1031 * so we know exactly what block size to use */
1032 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1033 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1034 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1035 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1036 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1038 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1039 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1044 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1045 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1046 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1047 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1050 l1ref0 += h->b8_stride;
1051 l1ref1 += h->b8_stride;
1052 l1mv0 += 2*b4_stride;
1053 l1mv1 += 2*b4_stride;
1057 if(h->direct_spatial_mv_pred){
1062 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1064 /* ref = min(neighbors) */
1065 for(list=0; list<2; list++){
1066 int refa = h->ref_cache[list][scan8[0] - 1];
1067 int refb = h->ref_cache[list][scan8[0] - 8];
1068 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1069 if(refc == PART_NOT_AVAILABLE)
1070 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1071 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1076 if(ref[0] < 0 && ref[1] < 0){
1077 ref[0] = ref[1] = 0;
1078 mv[0][0] = mv[0][1] =
1079 mv[1][0] = mv[1][1] = 0;
1081 for(list=0; list<2; list++){
1083 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1085 mv[list][0] = mv[list][1] = 0;
1091 *mb_type &= ~MB_TYPE_L1;
1092 sub_mb_type &= ~MB_TYPE_L1;
1093 }else if(ref[0] < 0){
1095 *mb_type &= ~MB_TYPE_L0;
1096 sub_mb_type &= ~MB_TYPE_L0;
1099 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1100 for(i8=0; i8<4; i8++){
1103 int xy8 = x8+y8*b8_stride;
1104 int xy4 = 3*x8+y8*b4_stride;
1107 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1109 h->sub_mb_type[i8] = sub_mb_type;
1111 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1112 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1113 if(!IS_INTRA(mb_type_col[y8])
1114 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1115 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1117 a= pack16to32(mv[0][0],mv[0][1]);
1119 b= pack16to32(mv[1][0],mv[1][1]);
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 b= pack16to32(mv[1][0],mv[1][1]);
1124 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1127 }else if(IS_16X16(*mb_type)){
1130 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1131 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1132 if(!IS_INTRA(mb_type_col[0])
1133 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1134 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1135 && (h->x264_build>33 || !h->x264_build)))){
1137 a= pack16to32(mv[0][0],mv[0][1]);
1139 b= pack16to32(mv[1][0],mv[1][1]);
1141 a= pack16to32(mv[0][0],mv[0][1]);
1142 b= pack16to32(mv[1][0],mv[1][1]);
1144 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1145 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1147 for(i8=0; i8<4; i8++){
1148 const int x8 = i8&1;
1149 const int y8 = i8>>1;
1151 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1153 h->sub_mb_type[i8] = sub_mb_type;
1155 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1156 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1157 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1158 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1161 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1162 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1163 && (h->x264_build>33 || !h->x264_build)))){
1164 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1165 if(IS_SUB_8X8(sub_mb_type)){
1166 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1167 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1169 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1171 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 for(i4=0; i4<4; i4++){
1175 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1176 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1178 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1180 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1186 }else{ /* direct temporal mv pred */
1187 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1188 const int *dist_scale_factor = h->dist_scale_factor;
1191 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1192 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1193 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1194 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1196 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1199 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1200 /* FIXME assumes direct_8x8_inference == 1 */
1201 int y_shift = 2*!IS_INTERLACED(*mb_type);
1203 for(i8=0; i8<4; i8++){
1204 const int x8 = i8&1;
1205 const int y8 = i8>>1;
1207 const int16_t (*l1mv)[2]= l1mv0;
1209 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1211 h->sub_mb_type[i8] = sub_mb_type;
1213 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1214 if(IS_INTRA(mb_type_col[y8])){
1215 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1216 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1217 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 ref0 = l1ref0[x8 + y8*b8_stride];
1223 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1225 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1228 scale = dist_scale_factor[ref0];
1229 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1232 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1233 int my_col = (mv_col[1]<<y_shift)/2;
1234 int mx = (scale * mv_col[0] + 128) >> 8;
1235 int my = (scale * my_col + 128) >> 8;
1236 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1237 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1243 /* one-to-one mv scaling */
1245 if(IS_16X16(*mb_type)){
1248 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1249 if(IS_INTRA(mb_type_col[0])){
1252 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1253 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1254 const int scale = dist_scale_factor[ref0];
1255 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1257 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1258 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1260 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1261 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1263 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1264 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1265 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1267 for(i8=0; i8<4; i8++){
1268 const int x8 = i8&1;
1269 const int y8 = i8>>1;
1271 const int16_t (*l1mv)[2]= l1mv0;
1273 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1275 h->sub_mb_type[i8] = sub_mb_type;
1276 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1277 if(IS_INTRA(mb_type_col[0])){
1278 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1279 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1280 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1286 ref0 = map_col_to_list0[0][ref0];
1288 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1291 scale = dist_scale_factor[ref0];
1293 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1294 if(IS_SUB_8X8(sub_mb_type)){
1295 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1296 int mx = (scale * mv_col[0] + 128) >> 8;
1297 int my = (scale * mv_col[1] + 128) >> 8;
1298 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1299 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1301 for(i4=0; i4<4; i4++){
1302 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1303 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1304 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1305 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1306 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1307 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1314 static inline void write_back_motion(H264Context *h, int mb_type){
1315 MpegEncContext * const s = &h->s;
1316 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1317 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1320 if(!USES_LIST(mb_type, 0))
1321 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1323 for(list=0; list<h->list_count; list++){
1325 if(!USES_LIST(mb_type, list))
1329 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1332 if( h->pps.cabac ) {
1333 if(IS_SKIP(mb_type))
1334 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1337 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1343 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1344 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1345 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1346 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1347 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1351 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1352 if(IS_8X8(mb_type)){
1353 uint8_t *direct_table = &h->direct_table[b8_xy];
1354 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1355 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1356 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1362 * Decodes a network abstraction layer unit.
1363 * @param consumed is the number of bytes used as input
1364 * @param length is the length of the array
1365 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1366 * @returns decoded bytes, might be src+1 if no escapes
1368 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1373 // src[0]&0x80; //forbidden bit
1374 h->nal_ref_idc= src[0]>>5;
1375 h->nal_unit_type= src[0]&0x1F;
1379 for(i=0; i<length; i++)
1380 printf("%2X ", src[i]);
1383 #ifdef HAVE_FAST_UNALIGNED
1384 # ifdef HAVE_FAST_64BIT
1386 for(i=0; i+1<length; i+=9){
1387 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1390 for(i=0; i+1<length; i+=5){
1391 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1394 if(i>0 && !src[i]) i--;
1398 for(i=0; i+1<length; i+=2){
1399 if(src[i]) continue;
1400 if(i>0 && src[i-1]==0) i--;
1402 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1404 /* startcode, so we must be past the end */
1412 if(i>=length-1){ //no escaped 0
1413 *dst_length= length;
1414 *consumed= length+1; //+1 for the header
1418 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1419 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1420 dst= h->rbsp_buffer[bufidx];
1426 //printf("decoding esc\n");
1427 memcpy(dst, src, i);
1430 //remove escapes (very rare 1:2^22)
1432 dst[di++]= src[si++];
1433 dst[di++]= src[si++];
1434 }else if(src[si]==0 && src[si+1]==0){
1435 if(src[si+2]==3){ //escape
1440 }else //next start code
1444 dst[di++]= src[si++];
1447 dst[di++]= src[si++];
1450 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1453 *consumed= si + 1;//+1 for the header
1454 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1459 * identifies the exact end of the bitstream
1460 * @return the length of the trailing, or 0 if damaged
1462 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1466 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1476 * IDCT transforms the 16 dc values and dequantizes them.
1477 * @param qp quantization parameter
1479 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1482 int temp[16]; //FIXME check if this is a good idea
1483 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1484 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1486 //memset(block, 64, 2*256);
1489 const int offset= y_offset[i];
1490 const int z0= block[offset+stride*0] + block[offset+stride*4];
1491 const int z1= block[offset+stride*0] - block[offset+stride*4];
1492 const int z2= block[offset+stride*1] - block[offset+stride*5];
1493 const int z3= block[offset+stride*1] + block[offset+stride*5];
1502 const int offset= x_offset[i];
1503 const int z0= temp[4*0+i] + temp[4*2+i];
1504 const int z1= temp[4*0+i] - temp[4*2+i];
1505 const int z2= temp[4*1+i] - temp[4*3+i];
1506 const int z3= temp[4*1+i] + temp[4*3+i];
1508 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1509 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1510 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1511 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1517 * DCT transforms the 16 dc values.
1518 * @param qp quantization parameter ??? FIXME
1520 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1521 // const int qmul= dequant_coeff[qp][0];
1523 int temp[16]; //FIXME check if this is a good idea
1524 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1525 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1528 const int offset= y_offset[i];
1529 const int z0= block[offset+stride*0] + block[offset+stride*4];
1530 const int z1= block[offset+stride*0] - block[offset+stride*4];
1531 const int z2= block[offset+stride*1] - block[offset+stride*5];
1532 const int z3= block[offset+stride*1] + block[offset+stride*5];
1541 const int offset= x_offset[i];
1542 const int z0= temp[4*0+i] + temp[4*2+i];
1543 const int z1= temp[4*0+i] - temp[4*2+i];
1544 const int z2= temp[4*1+i] - temp[4*3+i];
1545 const int z3= temp[4*1+i] + temp[4*3+i];
1547 block[stride*0 +offset]= (z0 + z3)>>1;
1548 block[stride*2 +offset]= (z1 + z2)>>1;
1549 block[stride*8 +offset]= (z1 - z2)>>1;
1550 block[stride*10+offset]= (z0 - z3)>>1;
1558 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1559 const int stride= 16*2;
1560 const int xStride= 16;
1563 a= block[stride*0 + xStride*0];
1564 b= block[stride*0 + xStride*1];
1565 c= block[stride*1 + xStride*0];
1566 d= block[stride*1 + xStride*1];
1573 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1574 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1575 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1576 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1580 static void chroma_dc_dct_c(DCTELEM *block){
1581 const int stride= 16*2;
1582 const int xStride= 16;
1585 a= block[stride*0 + xStride*0];
1586 b= block[stride*0 + xStride*1];
1587 c= block[stride*1 + xStride*0];
1588 d= block[stride*1 + xStride*1];
1595 block[stride*0 + xStride*0]= (a+c);
1596 block[stride*0 + xStride*1]= (e+b);
1597 block[stride*1 + xStride*0]= (a-c);
1598 block[stride*1 + xStride*1]= (e-b);
1603 * gets the chroma qp.
1605 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1606 return h->pps.chroma_qp_table[t][qscale];
1609 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1610 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1611 int src_x_offset, int src_y_offset,
1612 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1613 MpegEncContext * const s = &h->s;
1614 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1615 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1616 const int luma_xy= (mx&3) + ((my&3)<<2);
1617 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1618 uint8_t * src_cb, * src_cr;
1619 int extra_width= h->emu_edge_width;
1620 int extra_height= h->emu_edge_height;
1622 const int full_mx= mx>>2;
1623 const int full_my= my>>2;
1624 const int pic_width = 16*s->mb_width;
1625 const int pic_height = 16*s->mb_height >> MB_FIELD;
1627 if(mx&7) extra_width -= 3;
1628 if(my&7) extra_height -= 3;
1630 if( full_mx < 0-extra_width
1631 || full_my < 0-extra_height
1632 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1633 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1634 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1635 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1639 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1641 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1644 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1647 // chroma offset when predicting from a field of opposite parity
1648 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1649 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1651 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1652 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1655 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1656 src_cb= s->edge_emu_buffer;
1658 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1661 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1662 src_cr= s->edge_emu_buffer;
1664 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1667 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1668 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1669 int x_offset, int y_offset,
1670 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1671 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1672 int list0, int list1){
1673 MpegEncContext * const s = &h->s;
1674 qpel_mc_func *qpix_op= qpix_put;
1675 h264_chroma_mc_func chroma_op= chroma_put;
1677 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1678 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1679 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1680 x_offset += 8*s->mb_x;
1681 y_offset += 8*(s->mb_y >> MB_FIELD);
1684 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1685 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1686 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1687 qpix_op, chroma_op);
1690 chroma_op= chroma_avg;
1694 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1695 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1696 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1697 qpix_op, chroma_op);
1701 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1702 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1703 int x_offset, int y_offset,
1704 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1705 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1706 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1707 int list0, int list1){
1708 MpegEncContext * const s = &h->s;
1710 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1711 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1712 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1713 x_offset += 8*s->mb_x;
1714 y_offset += 8*(s->mb_y >> MB_FIELD);
1717 /* don't optimize for luma-only case, since B-frames usually
1718 * use implicit weights => chroma too. */
1719 uint8_t *tmp_cb = s->obmc_scratchpad;
1720 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1721 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1722 int refn0 = h->ref_cache[0][ scan8[n] ];
1723 int refn1 = h->ref_cache[1][ scan8[n] ];
1725 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1726 dest_y, dest_cb, dest_cr,
1727 x_offset, y_offset, qpix_put, chroma_put);
1728 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1729 tmp_y, tmp_cb, tmp_cr,
1730 x_offset, y_offset, qpix_put, chroma_put);
1732 if(h->use_weight == 2){
1733 int weight0 = h->implicit_weight[refn0][refn1];
1734 int weight1 = 64 - weight0;
1735 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1736 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1737 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1739 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1740 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1741 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1742 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1743 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1744 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1745 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1746 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1747 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1750 int list = list1 ? 1 : 0;
1751 int refn = h->ref_cache[list][ scan8[n] ];
1752 Picture *ref= &h->ref_list[list][refn];
1753 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1754 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1755 qpix_put, chroma_put);
1757 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1758 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1759 if(h->use_weight_chroma){
1760 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1761 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1762 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1763 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1768 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1769 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1770 int x_offset, int y_offset,
1771 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1772 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1773 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1774 int list0, int list1){
1775 if((h->use_weight==2 && list0 && list1
1776 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1777 || h->use_weight==1)
1778 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1779 x_offset, y_offset, qpix_put, chroma_put,
1780 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1782 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1783 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1786 static inline void prefetch_motion(H264Context *h, int list){
1787 /* fetch pixels for estimated mv 4 macroblocks ahead
1788 * optimized for 64byte cache lines */
1789 MpegEncContext * const s = &h->s;
1790 const int refn = h->ref_cache[list][scan8[0]];
1792 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1793 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1794 uint8_t **src= h->ref_list[list][refn].data;
1795 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1796 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1797 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1798 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1802 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1803 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1804 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1805 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1806 MpegEncContext * const s = &h->s;
1807 const int mb_xy= h->mb_xy;
1808 const int mb_type= s->current_picture.mb_type[mb_xy];
1810 assert(IS_INTER(mb_type));
1812 prefetch_motion(h, 0);
1814 if(IS_16X16(mb_type)){
1815 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1816 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1817 &weight_op[0], &weight_avg[0],
1818 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1819 }else if(IS_16X8(mb_type)){
1820 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1821 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1822 &weight_op[1], &weight_avg[1],
1823 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1824 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1825 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1826 &weight_op[1], &weight_avg[1],
1827 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1828 }else if(IS_8X16(mb_type)){
1829 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1830 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1831 &weight_op[2], &weight_avg[2],
1832 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1833 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1834 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1835 &weight_op[2], &weight_avg[2],
1836 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1840 assert(IS_8X8(mb_type));
1843 const int sub_mb_type= h->sub_mb_type[i];
1845 int x_offset= (i&1)<<2;
1846 int y_offset= (i&2)<<1;
1848 if(IS_SUB_8X8(sub_mb_type)){
1849 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1850 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1851 &weight_op[3], &weight_avg[3],
1852 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853 }else if(IS_SUB_8X4(sub_mb_type)){
1854 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1855 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1856 &weight_op[4], &weight_avg[4],
1857 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1858 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1859 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1860 &weight_op[4], &weight_avg[4],
1861 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1862 }else if(IS_SUB_4X8(sub_mb_type)){
1863 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1864 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1865 &weight_op[5], &weight_avg[5],
1866 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1867 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1868 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1869 &weight_op[5], &weight_avg[5],
1870 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1873 assert(IS_SUB_4X4(sub_mb_type));
1875 int sub_x_offset= x_offset + 2*(j&1);
1876 int sub_y_offset= y_offset + (j&2);
1877 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1878 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1879 &weight_op[6], &weight_avg[6],
1880 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1886 prefetch_motion(h, 1);
1889 static av_cold void init_cavlc_level_tab(void){
1890 int suffix_length, mask;
1893 for(suffix_length=0; suffix_length<7; suffix_length++){
1894 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1895 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1896 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1898 mask= -(level_code&1);
1899 level_code= (((2+level_code)>>1) ^ mask) - mask;
1900 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1901 cavlc_level_tab[suffix_length][i][0]= level_code;
1902 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1903 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1904 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1905 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1907 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1908 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1914 static av_cold void decode_init_vlc(void){
1915 static int done = 0;
1922 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1923 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1924 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1925 &chroma_dc_coeff_token_len [0], 1, 1,
1926 &chroma_dc_coeff_token_bits[0], 1, 1,
1927 INIT_VLC_USE_NEW_STATIC);
1931 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1932 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1933 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1934 &coeff_token_len [i][0], 1, 1,
1935 &coeff_token_bits[i][0], 1, 1,
1936 INIT_VLC_USE_NEW_STATIC);
1937 offset += coeff_token_vlc_tables_size[i];
1940 * This is a one time safety check to make sure that
1941 * the packed static coeff_token_vlc table sizes
1942 * were initialized correctly.
1944 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1947 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1948 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1949 init_vlc(&chroma_dc_total_zeros_vlc[i],
1950 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1951 &chroma_dc_total_zeros_len [i][0], 1, 1,
1952 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1953 INIT_VLC_USE_NEW_STATIC);
1955 for(i=0; i<15; i++){
1956 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1957 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1958 init_vlc(&total_zeros_vlc[i],
1959 TOTAL_ZEROS_VLC_BITS, 16,
1960 &total_zeros_len [i][0], 1, 1,
1961 &total_zeros_bits[i][0], 1, 1,
1962 INIT_VLC_USE_NEW_STATIC);
1966 run_vlc[i].table = run_vlc_tables[i];
1967 run_vlc[i].table_allocated = run_vlc_tables_size;
1968 init_vlc(&run_vlc[i],
1970 &run_len [i][0], 1, 1,
1971 &run_bits[i][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 run7_vlc.table = run7_vlc_table,
1975 run7_vlc.table_allocated = run7_vlc_table_size;
1976 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1977 &run_len [6][0], 1, 1,
1978 &run_bits[6][0], 1, 1,
1979 INIT_VLC_USE_NEW_STATIC);
1981 init_cavlc_level_tab();
1985 static void free_tables(H264Context *h){
1988 av_freep(&h->intra4x4_pred_mode);
1989 av_freep(&h->chroma_pred_mode_table);
1990 av_freep(&h->cbp_table);
1991 av_freep(&h->mvd_table[0]);
1992 av_freep(&h->mvd_table[1]);
1993 av_freep(&h->direct_table);
1994 av_freep(&h->non_zero_count);
1995 av_freep(&h->slice_table_base);
1996 h->slice_table= NULL;
1998 av_freep(&h->mb2b_xy);
1999 av_freep(&h->mb2b8_xy);
2001 for(i = 0; i < h->s.avctx->thread_count; i++) {
2002 hx = h->thread_context[i];
2004 av_freep(&hx->top_borders[1]);
2005 av_freep(&hx->top_borders[0]);
2006 av_freep(&hx->s.obmc_scratchpad);
2010 static void init_dequant8_coeff_table(H264Context *h){
2012 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2013 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2014 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2016 for(i=0; i<2; i++ ){
2017 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2018 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2022 for(q=0; q<52; q++){
2023 int shift = div6[q];
2026 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2027 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2028 h->pps.scaling_matrix8[i][x]) << shift;
2033 static void init_dequant4_coeff_table(H264Context *h){
2035 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2036 for(i=0; i<6; i++ ){
2037 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2039 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2040 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2047 for(q=0; q<52; q++){
2048 int shift = div6[q] + 2;
2051 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2052 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2053 h->pps.scaling_matrix4[i][x]) << shift;
2058 static void init_dequant_tables(H264Context *h){
2060 init_dequant4_coeff_table(h);
2061 if(h->pps.transform_8x8_mode)
2062 init_dequant8_coeff_table(h);
2063 if(h->sps.transform_bypass){
2066 h->dequant4_coeff[i][0][x] = 1<<6;
2067 if(h->pps.transform_8x8_mode)
2070 h->dequant8_coeff[i][0][x] = 1<<6;
2077 * needs width/height
2079 static int alloc_tables(H264Context *h){
2080 MpegEncContext * const s = &h->s;
2081 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2084 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2086 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2087 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2088 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2090 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2091 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2092 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2093 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2095 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2096 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2098 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2099 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2100 for(y=0; y<s->mb_height; y++){
2101 for(x=0; x<s->mb_width; x++){
2102 const int mb_xy= x + y*s->mb_stride;
2103 const int b_xy = 4*x + 4*y*h->b_stride;
2104 const int b8_xy= 2*x + 2*y*h->b8_stride;
2106 h->mb2b_xy [mb_xy]= b_xy;
2107 h->mb2b8_xy[mb_xy]= b8_xy;
2111 s->obmc_scratchpad = NULL;
2113 if(!h->dequant4_coeff[0])
2114 init_dequant_tables(h);
2123 * Mimic alloc_tables(), but for every context thread.
2125 static void clone_tables(H264Context *dst, H264Context *src){
2126 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2127 dst->non_zero_count = src->non_zero_count;
2128 dst->slice_table = src->slice_table;
2129 dst->cbp_table = src->cbp_table;
2130 dst->mb2b_xy = src->mb2b_xy;
2131 dst->mb2b8_xy = src->mb2b8_xy;
2132 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2133 dst->mvd_table[0] = src->mvd_table[0];
2134 dst->mvd_table[1] = src->mvd_table[1];
2135 dst->direct_table = src->direct_table;
2137 dst->s.obmc_scratchpad = NULL;
2138 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2143 * Allocate buffers which are not shared amongst multiple threads.
2145 static int context_init(H264Context *h){
2146 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2147 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2151 return -1; // free_tables will clean up for us
2154 static av_cold void common_init(H264Context *h){
2155 MpegEncContext * const s = &h->s;
2157 s->width = s->avctx->width;
2158 s->height = s->avctx->height;
2159 s->codec_id= s->avctx->codec->id;
2161 ff_h264_pred_init(&h->hpc, s->codec_id);
2163 h->dequant_coeff_pps= -1;
2164 s->unrestricted_mv=1;
2165 s->decode=1; //FIXME
2167 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2169 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2170 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2173 static av_cold int decode_init(AVCodecContext *avctx){
2174 H264Context *h= avctx->priv_data;
2175 MpegEncContext * const s = &h->s;
2177 MPV_decode_defaults(s);
2182 s->out_format = FMT_H264;
2183 s->workaround_bugs= avctx->workaround_bugs;
2186 // s->decode_mb= ff_h263_decode_mb;
2187 s->quarter_sample = 1;
2190 if(avctx->codec_id == CODEC_ID_SVQ3)
2191 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2192 else if(avctx->codec_id == CODEC_ID_H264_VDPAU)
2193 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2195 avctx->pix_fmt= PIX_FMT_YUV420P;
2199 if(avctx->extradata_size > 0 && avctx->extradata &&
2200 *(char *)avctx->extradata == 1){
2207 h->thread_context[0] = h;
2208 h->outputed_poc = INT_MIN;
2209 h->prev_poc_msb= 1<<16;
2213 static int frame_start(H264Context *h){
2214 MpegEncContext * const s = &h->s;
2217 if(MPV_frame_start(s, s->avctx) < 0)
2219 ff_er_frame_start(s);
2221 * MPV_frame_start uses pict_type to derive key_frame.
2222 * This is incorrect for H.264; IDR markings must be used.
2223 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2224 * See decode_nal_units().
2226 s->current_picture_ptr->key_frame= 0;
2228 assert(s->linesize && s->uvlinesize);
2230 for(i=0; i<16; i++){
2231 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2232 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2235 h->block_offset[16+i]=
2236 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2237 h->block_offset[24+16+i]=
2238 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2241 /* can't be in alloc_tables because linesize isn't known there.
2242 * FIXME: redo bipred weight to not require extra buffer? */
2243 for(i = 0; i < s->avctx->thread_count; i++)
2244 if(!h->thread_context[i]->s.obmc_scratchpad)
2245 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2247 /* some macroblocks will be accessed before they're available */
2248 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2249 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2251 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2253 // We mark the current picture as non-reference after allocating it, so
2254 // that if we break out due to an error it can be released automatically
2255 // in the next MPV_frame_start().
2256 // SVQ3 as well as most other codecs have only last/next/current and thus
2257 // get released even with set reference, besides SVQ3 and others do not
2258 // mark frames as reference later "naturally".
2259 if(s->codec_id != CODEC_ID_SVQ3)
2260 s->current_picture_ptr->reference= 0;
2262 s->current_picture_ptr->field_poc[0]=
2263 s->current_picture_ptr->field_poc[1]= INT_MAX;
2264 assert(s->current_picture_ptr->long_ref==0);
2269 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2270 MpegEncContext * const s = &h->s;
2279 src_cb -= uvlinesize;
2280 src_cr -= uvlinesize;
2282 if(!simple && FRAME_MBAFF){
2284 offset = MB_MBAFF ? 1 : 17;
2285 uvoffset= MB_MBAFF ? 1 : 9;
2287 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2288 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2289 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2290 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2296 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2297 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2298 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2299 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2305 top_idx = MB_MBAFF ? 0 : 1;
2307 step= MB_MBAFF ? 2 : 1;
2310 // There are two lines saved, the line above the the top macroblock of a pair,
2311 // and the line above the bottom macroblock
2312 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2313 for(i=1; i<17 - skiplast; i++){
2314 h->left_border[offset+i*step]= src_y[15+i* linesize];
2317 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2318 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2320 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2321 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2322 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2323 for(i=1; i<9 - skiplast; i++){
2324 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2325 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2327 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2328 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2332 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2333 MpegEncContext * const s = &h->s;
2344 if(!simple && FRAME_MBAFF){
2346 offset = MB_MBAFF ? 1 : 17;
2347 uvoffset= MB_MBAFF ? 1 : 9;
2351 top_idx = MB_MBAFF ? 0 : 1;
2353 step= MB_MBAFF ? 2 : 1;
2356 if(h->deblocking_filter == 2) {
2358 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2359 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2361 deblock_left = (s->mb_x > 0);
2362 deblock_top = (s->mb_y > !!MB_FIELD);
2365 src_y -= linesize + 1;
2366 src_cb -= uvlinesize + 1;
2367 src_cr -= uvlinesize + 1;
2369 #define XCHG(a,b,t,xchg)\
2376 for(i = !deblock_top; i<16; i++){
2377 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2379 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2383 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2384 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2385 if(s->mb_x+1 < s->mb_width){
2386 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2390 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2392 for(i = !deblock_top; i<8; i++){
2393 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2394 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2396 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2397 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2400 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2401 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2406 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2407 MpegEncContext * const s = &h->s;
2408 const int mb_x= s->mb_x;
2409 const int mb_y= s->mb_y;
2410 const int mb_xy= h->mb_xy;
2411 const int mb_type= s->current_picture.mb_type[mb_xy];
2412 uint8_t *dest_y, *dest_cb, *dest_cr;
2413 int linesize, uvlinesize /*dct_offset*/;
2415 int *block_offset = &h->block_offset[0];
2416 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2417 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2418 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2419 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2421 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2422 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2423 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2425 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2426 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2428 if (!simple && MB_FIELD) {
2429 linesize = h->mb_linesize = s->linesize * 2;
2430 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2431 block_offset = &h->block_offset[24];
2432 if(mb_y&1){ //FIXME move out of this function?
2433 dest_y -= s->linesize*15;
2434 dest_cb-= s->uvlinesize*7;
2435 dest_cr-= s->uvlinesize*7;
2439 for(list=0; list<h->list_count; list++){
2440 if(!USES_LIST(mb_type, list))
2442 if(IS_16X16(mb_type)){
2443 int8_t *ref = &h->ref_cache[list][scan8[0]];
2444 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2446 for(i=0; i<16; i+=4){
2447 int ref = h->ref_cache[list][scan8[i]];
2449 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2455 linesize = h->mb_linesize = s->linesize;
2456 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2457 // dct_offset = s->linesize * 16;
2460 if (!simple && IS_INTRA_PCM(mb_type)) {
2461 for (i=0; i<16; i++) {
2462 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2464 for (i=0; i<8; i++) {
2465 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2466 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2469 if(IS_INTRA(mb_type)){
2470 if(h->deblocking_filter)
2471 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2473 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2474 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2475 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2478 if(IS_INTRA4x4(mb_type)){
2479 if(simple || !s->encoding){
2480 if(IS_8x8DCT(mb_type)){
2481 if(transform_bypass){
2483 idct_add = s->dsp.add_pixels8;
2485 idct_dc_add = s->dsp.h264_idct8_dc_add;
2486 idct_add = s->dsp.h264_idct8_add;
2488 for(i=0; i<16; i+=4){
2489 uint8_t * const ptr= dest_y + block_offset[i];
2490 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2491 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2492 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2494 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2495 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2496 (h->topright_samples_available<<i)&0x4000, linesize);
2498 if(nnz == 1 && h->mb[i*16])
2499 idct_dc_add(ptr, h->mb + i*16, linesize);
2501 idct_add (ptr, h->mb + i*16, linesize);
2506 if(transform_bypass){
2508 idct_add = s->dsp.add_pixels4;
2510 idct_dc_add = s->dsp.h264_idct_dc_add;
2511 idct_add = s->dsp.h264_idct_add;
2513 for(i=0; i<16; i++){
2514 uint8_t * const ptr= dest_y + block_offset[i];
2515 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2517 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2518 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2522 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2523 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2524 assert(mb_y || linesize <= block_offset[i]);
2525 if(!topright_avail){
2526 tr= ptr[3 - linesize]*0x01010101;
2527 topright= (uint8_t*) &tr;
2529 topright= ptr + 4 - linesize;
2533 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2534 nnz = h->non_zero_count_cache[ scan8[i] ];
2537 if(nnz == 1 && h->mb[i*16])
2538 idct_dc_add(ptr, h->mb + i*16, linesize);
2540 idct_add (ptr, h->mb + i*16, linesize);
2542 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2549 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2551 if(!transform_bypass)
2552 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2554 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2556 if(h->deblocking_filter)
2557 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2559 hl_motion(h, dest_y, dest_cb, dest_cr,
2560 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2561 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2562 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2566 if(!IS_INTRA4x4(mb_type)){
2568 if(IS_INTRA16x16(mb_type)){
2569 if(transform_bypass){
2570 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2571 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2573 for(i=0; i<16; i++){
2574 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2575 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2579 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2581 }else if(h->cbp&15){
2582 if(transform_bypass){
2583 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2584 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2585 for(i=0; i<16; i+=di){
2586 if(h->non_zero_count_cache[ scan8[i] ]){
2587 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2591 if(IS_8x8DCT(mb_type)){
2592 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2594 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2599 for(i=0; i<16; i++){
2600 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2601 uint8_t * const ptr= dest_y + block_offset[i];
2602 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2608 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2609 uint8_t *dest[2] = {dest_cb, dest_cr};
2610 if(transform_bypass){
2611 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2612 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2613 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2615 idct_add = s->dsp.add_pixels4;
2616 for(i=16; i<16+8; i++){
2617 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2618 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2622 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2623 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2625 idct_add = s->dsp.h264_idct_add;
2626 idct_dc_add = s->dsp.h264_idct_dc_add;
2627 for(i=16; i<16+8; i++){
2628 if(h->non_zero_count_cache[ scan8[i] ])
2629 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2630 else if(h->mb[i*16])
2631 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2634 for(i=16; i<16+8; i++){
2635 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2636 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2637 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2644 if(h->cbp || IS_INTRA(mb_type))
2645 s->dsp.clear_blocks(h->mb);
2647 if(h->deblocking_filter) {
2648 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2649 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2650 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2651 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2652 if (!simple && FRAME_MBAFF) {
2653 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2655 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2661 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2663 static void hl_decode_mb_simple(H264Context *h){
2664 hl_decode_mb_internal(h, 1);
2668 * Process a macroblock; this handles edge cases, such as interlacing.
2670 static void av_noinline hl_decode_mb_complex(H264Context *h){
2671 hl_decode_mb_internal(h, 0);
2674 static void hl_decode_mb(H264Context *h){
2675 MpegEncContext * const s = &h->s;
2676 const int mb_xy= h->mb_xy;
2677 const int mb_type= s->current_picture.mb_type[mb_xy];
2678 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2680 if(ENABLE_H264_ENCODER && !s->decode)
2684 hl_decode_mb_complex(h);
2685 else hl_decode_mb_simple(h);
2688 static void pic_as_field(Picture *pic, const int parity){
2690 for (i = 0; i < 4; ++i) {
2691 if (parity == PICT_BOTTOM_FIELD)
2692 pic->data[i] += pic->linesize[i];
2693 pic->reference = parity;
2694 pic->linesize[i] *= 2;
2696 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2699 static int split_field_copy(Picture *dest, Picture *src,
2700 int parity, int id_add){
2701 int match = !!(src->reference & parity);
2705 if(parity != PICT_FRAME){
2706 pic_as_field(dest, parity);
2708 dest->pic_id += id_add;
2715 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2719 while(i[0]<len || i[1]<len){
2720 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2722 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2725 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2726 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2729 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2730 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2737 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2742 best_poc= dir ? INT_MIN : INT_MAX;
2744 for(i=0; i<len; i++){
2745 const int poc= src[i]->poc;
2746 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2748 sorted[out_i]= src[i];
2751 if(best_poc == (dir ? INT_MIN : INT_MAX))
2753 limit= sorted[out_i++]->poc - dir;
2759 * fills the default_ref_list.
2761 static int fill_default_ref_list(H264Context *h){
2762 MpegEncContext * const s = &h->s;
2765 if(h->slice_type_nos==FF_B_TYPE){
2766 Picture *sorted[32];
2771 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2773 cur_poc= s->current_picture_ptr->poc;
2775 for(list= 0; list<2; list++){
2776 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2777 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2779 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2780 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2783 if(len < h->ref_count[list])
2784 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2788 if(lens[0] == lens[1] && lens[1] > 1){
2789 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2791 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2794 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2795 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2797 if(len < h->ref_count[0])
2798 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2801 for (i=0; i<h->ref_count[0]; i++) {
2802 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2804 if(h->slice_type_nos==FF_B_TYPE){
2805 for (i=0; i<h->ref_count[1]; i++) {
2806 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2813 static void print_short_term(H264Context *h);
2814 static void print_long_term(H264Context *h);
2817 * Extract structure information about the picture described by pic_num in
2818 * the current decoding context (frame or field). Note that pic_num is
2819 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2820 * @param pic_num picture number for which to extract structure information
2821 * @param structure one of PICT_XXX describing structure of picture
2823 * @return frame number (short term) or long term index of picture
2824 * described by pic_num
2826 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2827 MpegEncContext * const s = &h->s;
2829 *structure = s->picture_structure;
2832 /* opposite field */
2833 *structure ^= PICT_FRAME;
2840 static int decode_ref_pic_list_reordering(H264Context *h){
2841 MpegEncContext * const s = &h->s;
2842 int list, index, pic_structure;
2844 print_short_term(h);
2847 for(list=0; list<h->list_count; list++){
2848 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2850 if(get_bits1(&s->gb)){
2851 int pred= h->curr_pic_num;
2853 for(index=0; ; index++){
2854 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2855 unsigned int pic_id;
2857 Picture *ref = NULL;
2859 if(reordering_of_pic_nums_idc==3)
2862 if(index >= h->ref_count[list]){
2863 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2867 if(reordering_of_pic_nums_idc<3){
2868 if(reordering_of_pic_nums_idc<2){
2869 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2872 if(abs_diff_pic_num > h->max_pic_num){
2873 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2877 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2878 else pred+= abs_diff_pic_num;
2879 pred &= h->max_pic_num - 1;
2881 frame_num = pic_num_extract(h, pred, &pic_structure);
2883 for(i= h->short_ref_count-1; i>=0; i--){
2884 ref = h->short_ref[i];
2885 assert(ref->reference);
2886 assert(!ref->long_ref);
2888 ref->frame_num == frame_num &&
2889 (ref->reference & pic_structure)
2897 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2899 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2902 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2905 ref = h->long_ref[long_idx];
2906 assert(!(ref && !ref->reference));
2907 if(ref && (ref->reference & pic_structure)){
2908 ref->pic_id= pic_id;
2909 assert(ref->long_ref);
2917 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2918 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2920 for(i=index; i+1<h->ref_count[list]; i++){
2921 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2924 for(; i > index; i--){
2925 h->ref_list[list][i]= h->ref_list[list][i-1];
2927 h->ref_list[list][index]= *ref;
2929 pic_as_field(&h->ref_list[list][index], pic_structure);
2933 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2939 for(list=0; list<h->list_count; list++){
2940 for(index= 0; index < h->ref_count[list]; index++){
2941 if(!h->ref_list[list][index].data[0]){
2942 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2943 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2951 static void fill_mbaff_ref_list(H264Context *h){
2953 for(list=0; list<2; list++){ //FIXME try list_count
2954 for(i=0; i<h->ref_count[list]; i++){
2955 Picture *frame = &h->ref_list[list][i];
2956 Picture *field = &h->ref_list[list][16+2*i];
2959 field[0].linesize[j] <<= 1;
2960 field[0].reference = PICT_TOP_FIELD;
2961 field[0].poc= field[0].field_poc[0];
2962 field[1] = field[0];
2964 field[1].data[j] += frame->linesize[j];
2965 field[1].reference = PICT_BOTTOM_FIELD;
2966 field[1].poc= field[1].field_poc[1];
2968 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2969 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2971 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2972 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2976 for(j=0; j<h->ref_count[1]; j++){
2977 for(i=0; i<h->ref_count[0]; i++)
2978 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2979 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2980 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2984 static int pred_weight_table(H264Context *h){
2985 MpegEncContext * const s = &h->s;
2987 int luma_def, chroma_def;
2990 h->use_weight_chroma= 0;
2991 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2992 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2993 luma_def = 1<<h->luma_log2_weight_denom;
2994 chroma_def = 1<<h->chroma_log2_weight_denom;
2996 for(list=0; list<2; list++){
2997 for(i=0; i<h->ref_count[list]; i++){
2998 int luma_weight_flag, chroma_weight_flag;
3000 luma_weight_flag= get_bits1(&s->gb);
3001 if(luma_weight_flag){
3002 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3003 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3004 if( h->luma_weight[list][i] != luma_def
3005 || h->luma_offset[list][i] != 0)
3008 h->luma_weight[list][i]= luma_def;
3009 h->luma_offset[list][i]= 0;
3013 chroma_weight_flag= get_bits1(&s->gb);
3014 if(chroma_weight_flag){
3017 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3018 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3019 if( h->chroma_weight[list][i][j] != chroma_def
3020 || h->chroma_offset[list][i][j] != 0)
3021 h->use_weight_chroma= 1;
3026 h->chroma_weight[list][i][j]= chroma_def;
3027 h->chroma_offset[list][i][j]= 0;
3032 if(h->slice_type_nos != FF_B_TYPE) break;
3034 h->use_weight= h->use_weight || h->use_weight_chroma;
3038 static void implicit_weight_table(H264Context *h){
3039 MpegEncContext * const s = &h->s;
3041 int cur_poc = s->current_picture_ptr->poc;
3043 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3044 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3046 h->use_weight_chroma= 0;
3051 h->use_weight_chroma= 2;
3052 h->luma_log2_weight_denom= 5;
3053 h->chroma_log2_weight_denom= 5;
3055 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3056 int poc0 = h->ref_list[0][ref0].poc;
3057 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3058 int poc1 = h->ref_list[1][ref1].poc;
3059 int td = av_clip(poc1 - poc0, -128, 127);
3061 int tb = av_clip(cur_poc - poc0, -128, 127);
3062 int tx = (16384 + (FFABS(td) >> 1)) / td;
3063 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3064 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3065 h->implicit_weight[ref0][ref1] = 32;
3067 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3069 h->implicit_weight[ref0][ref1] = 32;
3075 * Mark a picture as no longer needed for reference. The refmask
3076 * argument allows unreferencing of individual fields or the whole frame.
3077 * If the picture becomes entirely unreferenced, but is being held for
3078 * display purposes, it is marked as such.
3079 * @param refmask mask of fields to unreference; the mask is bitwise
3080 * anded with the reference marking of pic
3081 * @return non-zero if pic becomes entirely unreferenced (except possibly
3082 * for display purposes) zero if one of the fields remains in
3085 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3087 if (pic->reference &= refmask) {
3090 for(i = 0; h->delayed_pic[i]; i++)
3091 if(pic == h->delayed_pic[i]){
3092 pic->reference=DELAYED_PIC_REF;
3100 * instantaneous decoder refresh.
3102 static void idr(H264Context *h){
3105 for(i=0; i<16; i++){
3106 remove_long(h, i, 0);
3108 assert(h->long_ref_count==0);
3110 for(i=0; i<h->short_ref_count; i++){
3111 unreference_pic(h, h->short_ref[i], 0);
3112 h->short_ref[i]= NULL;
3114 h->short_ref_count=0;
3115 h->prev_frame_num= 0;
3116 h->prev_frame_num_offset= 0;
3121 /* forget old pics after a seek */
3122 static void flush_dpb(AVCodecContext *avctx){
3123 H264Context *h= avctx->priv_data;
3125 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3126 if(h->delayed_pic[i])
3127 h->delayed_pic[i]->reference= 0;
3128 h->delayed_pic[i]= NULL;
3130 h->outputed_poc= INT_MIN;
3132 if(h->s.current_picture_ptr)
3133 h->s.current_picture_ptr->reference= 0;
3134 h->s.first_field= 0;
3135 ff_mpeg_flush(avctx);
3139 * Find a Picture in the short term reference list by frame number.
3140 * @param frame_num frame number to search for
3141 * @param idx the index into h->short_ref where returned picture is found
3142 * undefined if no picture found.
3143 * @return pointer to the found picture, or NULL if no pic with the provided
3144 * frame number is found
3146 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3147 MpegEncContext * const s = &h->s;
3150 for(i=0; i<h->short_ref_count; i++){
3151 Picture *pic= h->short_ref[i];
3152 if(s->avctx->debug&FF_DEBUG_MMCO)
3153 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3154 if(pic->frame_num == frame_num) {
3163 * Remove a picture from the short term reference list by its index in
3164 * that list. This does no checking on the provided index; it is assumed
3165 * to be valid. Other list entries are shifted down.
3166 * @param i index into h->short_ref of picture to remove.
3168 static void remove_short_at_index(H264Context *h, int i){
3169 assert(i >= 0 && i < h->short_ref_count);
3170 h->short_ref[i]= NULL;
3171 if (--h->short_ref_count)
3172 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3177 * @return the removed picture or NULL if an error occurs
3179 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3180 MpegEncContext * const s = &h->s;
3184 if(s->avctx->debug&FF_DEBUG_MMCO)
3185 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3187 pic = find_short(h, frame_num, &i);
3189 if(unreference_pic(h, pic, ref_mask))
3190 remove_short_at_index(h, i);
3197 * Remove a picture from the long term reference list by its index in
3199 * @return the removed picture or NULL if an error occurs
3201 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3204 pic= h->long_ref[i];
3206 if(unreference_pic(h, pic, ref_mask)){
3207 assert(h->long_ref[i]->long_ref == 1);
3208 h->long_ref[i]->long_ref= 0;
3209 h->long_ref[i]= NULL;
3210 h->long_ref_count--;
3218 * print short term list
3220 static void print_short_term(H264Context *h) {
3222 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3223 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3224 for(i=0; i<h->short_ref_count; i++){
3225 Picture *pic= h->short_ref[i];
3226 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3232 * print long term list
3234 static void print_long_term(H264Context *h) {
3236 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3237 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3238 for(i = 0; i < 16; i++){
3239 Picture *pic= h->long_ref[i];
3241 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3248 * Executes the reference picture marking (memory management control operations).
3250 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3251 MpegEncContext * const s = &h->s;
3253 int current_ref_assigned=0;
3256 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3257 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3259 for(i=0; i<mmco_count; i++){
3260 int structure, frame_num;
3261 if(s->avctx->debug&FF_DEBUG_MMCO)
3262 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3264 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3265 || mmco[i].opcode == MMCO_SHORT2LONG){
3266 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3267 pic = find_short(h, frame_num, &j);
3269 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3270 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3271 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3276 switch(mmco[i].opcode){
3277 case MMCO_SHORT2UNUSED:
3278 if(s->avctx->debug&FF_DEBUG_MMCO)
3279 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3280 remove_short(h, frame_num, structure ^ PICT_FRAME);
3282 case MMCO_SHORT2LONG:
3283 if (h->long_ref[mmco[i].long_arg] != pic)
3284 remove_long(h, mmco[i].long_arg, 0);
3286 remove_short_at_index(h, j);
3287 h->long_ref[ mmco[i].long_arg ]= pic;
3288 if (h->long_ref[ mmco[i].long_arg ]){
3289 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3290 h->long_ref_count++;
3293 case MMCO_LONG2UNUSED:
3294 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3295 pic = h->long_ref[j];
3297 remove_long(h, j, structure ^ PICT_FRAME);
3298 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3299 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3302 // Comment below left from previous code as it is an interresting note.
3303 /* First field in pair is in short term list or
3304 * at a different long term index.
3305 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3306 * Report the problem and keep the pair where it is,
3307 * and mark this field valid.
3310 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3311 remove_long(h, mmco[i].long_arg, 0);
3313 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3314 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3315 h->long_ref_count++;
3318 s->current_picture_ptr->reference |= s->picture_structure;
3319 current_ref_assigned=1;
3321 case MMCO_SET_MAX_LONG:
3322 assert(mmco[i].long_arg <= 16);
3323 // just remove the long term which index is greater than new max
3324 for(j = mmco[i].long_arg; j<16; j++){
3325 remove_long(h, j, 0);
3329 while(h->short_ref_count){
3330 remove_short(h, h->short_ref[0]->frame_num, 0);
3332 for(j = 0; j < 16; j++) {
3333 remove_long(h, j, 0);
3335 s->current_picture_ptr->poc=
3336 s->current_picture_ptr->field_poc[0]=
3337 s->current_picture_ptr->field_poc[1]=
3341 s->current_picture_ptr->frame_num= 0;
3347 if (!current_ref_assigned) {
3348 /* Second field of complementary field pair; the first field of
3349 * which is already referenced. If short referenced, it
3350 * should be first entry in short_ref. If not, it must exist
3351 * in long_ref; trying to put it on the short list here is an
3352 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3354 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3355 /* Just mark the second field valid */
3356 s->current_picture_ptr->reference = PICT_FRAME;
3357 } else if (s->current_picture_ptr->long_ref) {
3358 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3359 "assignment for second field "
3360 "in complementary field pair "
3361 "(first field is long term)\n");
3363 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3365 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3368 if(h->short_ref_count)
3369 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3371 h->short_ref[0]= s->current_picture_ptr;
3372 h->short_ref_count++;
3373 s->current_picture_ptr->reference |= s->picture_structure;
3377 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3379 /* We have too many reference frames, probably due to corrupted
3380 * stream. Need to discard one frame. Prevents overrun of the
3381 * short_ref and long_ref buffers.
3383 av_log(h->s.avctx, AV_LOG_ERROR,
3384 "number of reference frames exceeds max (probably "
3385 "corrupt input), discarding one\n");
3387 if (h->long_ref_count && !h->short_ref_count) {
3388 for (i = 0; i < 16; ++i)
3393 remove_long(h, i, 0);
3395 pic = h->short_ref[h->short_ref_count - 1];
3396 remove_short(h, pic->frame_num, 0);
3400 print_short_term(h);
3405 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3406 MpegEncContext * const s = &h->s;
3410 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3411 s->broken_link= get_bits1(gb) -1;
3413 h->mmco[0].opcode= MMCO_LONG;
3414 h->mmco[0].long_arg= 0;
3418 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3419 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3420 MMCOOpcode opcode= get_ue_golomb_31(gb);
3422 h->mmco[i].opcode= opcode;
3423 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3424 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3425 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3426 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3430 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3431 unsigned int long_arg= get_ue_golomb_31(gb);
3432 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3433 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3436 h->mmco[i].long_arg= long_arg;
3439 if(opcode > (unsigned)MMCO_LONG){
3440 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3443 if(opcode == MMCO_END)
3448 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3450 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3451 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3452 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3453 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3455 if (FIELD_PICTURE) {
3456 h->mmco[0].short_pic_num *= 2;
3457 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3458 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3468 static int init_poc(H264Context *h){
3469 MpegEncContext * const s = &h->s;
3470 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3472 Picture *cur = s->current_picture_ptr;
3474 h->frame_num_offset= h->prev_frame_num_offset;
3475 if(h->frame_num < h->prev_frame_num)
3476 h->frame_num_offset += max_frame_num;
3478 if(h->sps.poc_type==0){
3479 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3481 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3482 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3483 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3484 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3486 h->poc_msb = h->prev_poc_msb;
3487 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3489 field_poc[1] = h->poc_msb + h->poc_lsb;
3490 if(s->picture_structure == PICT_FRAME)
3491 field_poc[1] += h->delta_poc_bottom;
3492 }else if(h->sps.poc_type==1){
3493 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3496 if(h->sps.poc_cycle_length != 0)
3497 abs_frame_num = h->frame_num_offset + h->frame_num;
3501 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3504 expected_delta_per_poc_cycle = 0;
3505 for(i=0; i < h->sps.poc_cycle_length; i++)
3506 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3508 if(abs_frame_num > 0){
3509 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3510 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3512 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3513 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3514 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3518 if(h->nal_ref_idc == 0)
3519 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3521 field_poc[0] = expectedpoc + h->delta_poc[0];
3522 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3524 if(s->picture_structure == PICT_FRAME)
3525 field_poc[1] += h->delta_poc[1];
3527 int poc= 2*(h->frame_num_offset + h->frame_num);
3536 if(s->picture_structure != PICT_BOTTOM_FIELD)
3537 s->current_picture_ptr->field_poc[0]= field_poc[0];
3538 if(s->picture_structure != PICT_TOP_FIELD)
3539 s->current_picture_ptr->field_poc[1]= field_poc[1];
3540 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3547 * initialize scan tables
3549 static void init_scan_tables(H264Context *h){
3550 MpegEncContext * const s = &h->s;
3552 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3553 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3554 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3556 for(i=0; i<16; i++){
3557 #define T(x) (x>>2) | ((x<<2) & 0xF)
3558 h->zigzag_scan[i] = T(zigzag_scan[i]);
3559 h-> field_scan[i] = T( field_scan[i]);
3563 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3564 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3565 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3566 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3567 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3569 for(i=0; i<64; i++){
3570 #define T(x) (x>>3) | ((x&7)<<3)
3571 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3572 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3573 h->field_scan8x8[i] = T(field_scan8x8[i]);
3574 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3578 if(h->sps.transform_bypass){ //FIXME same ugly
3579 h->zigzag_scan_q0 = zigzag_scan;
3580 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3581 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3582 h->field_scan_q0 = field_scan;
3583 h->field_scan8x8_q0 = field_scan8x8;
3584 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3586 h->zigzag_scan_q0 = h->zigzag_scan;
3587 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3588 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3589 h->field_scan_q0 = h->field_scan;
3590 h->field_scan8x8_q0 = h->field_scan8x8;
3591 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3596 * Replicates H264 "master" context to thread contexts.
3598 static void clone_slice(H264Context *dst, H264Context *src)
3600 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3601 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3602 dst->s.current_picture = src->s.current_picture;
3603 dst->s.linesize = src->s.linesize;
3604 dst->s.uvlinesize = src->s.uvlinesize;
3605 dst->s.first_field = src->s.first_field;
3607 dst->prev_poc_msb = src->prev_poc_msb;
3608 dst->prev_poc_lsb = src->prev_poc_lsb;
3609 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3610 dst->prev_frame_num = src->prev_frame_num;
3611 dst->short_ref_count = src->short_ref_count;
3613 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3614 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3615 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3616 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3618 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3619 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3623 * decodes a slice header.
3624 * This will also call MPV_common_init() and frame_start() as needed.
3626 * @param h h264context
3627 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3629 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3631 static int decode_slice_header(H264Context *h, H264Context *h0){
3632 MpegEncContext * const s = &h->s;
3633 MpegEncContext * const s0 = &h0->s;
3634 unsigned int first_mb_in_slice;
3635 unsigned int pps_id;
3636 int num_ref_idx_active_override_flag;
3637 unsigned int slice_type, tmp, i, j;
3638 int default_ref_list_done = 0;
3639 int last_pic_structure;
3641 s->dropable= h->nal_ref_idc == 0;
3643 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3644 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3645 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3647 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3648 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3651 first_mb_in_slice= get_ue_golomb(&s->gb);
3653 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3654 h0->current_slice = 0;
3655 if (!s0->first_field)
3656 s->current_picture_ptr= NULL;
3659 slice_type= get_ue_golomb_31(&s->gb);
3661 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3666 h->slice_type_fixed=1;
3668 h->slice_type_fixed=0;
3670 slice_type= golomb_to_pict_type[ slice_type ];
3671 if (slice_type == FF_I_TYPE
3672 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3673 default_ref_list_done = 1;
3675 h->slice_type= slice_type;
3676 h->slice_type_nos= slice_type & 3;
3678 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3679 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3680 av_log(h->s.avctx, AV_LOG_ERROR,
3681 "B picture before any references, skipping\n");
3685 pps_id= get_ue_golomb(&s->gb);
3686 if(pps_id>=MAX_PPS_COUNT){
3687 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3690 if(!h0->pps_buffers[pps_id]) {
3691 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3694 h->pps= *h0->pps_buffers[pps_id];
3696 if(!h0->sps_buffers[h->pps.sps_id]) {
3697 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3700 h->sps = *h0->sps_buffers[h->pps.sps_id];
3702 if(h == h0 && h->dequant_coeff_pps != pps_id){
3703 h->dequant_coeff_pps = pps_id;
3704 init_dequant_tables(h);
3707 s->mb_width= h->sps.mb_width;
3708 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3710 h->b_stride= s->mb_width*4;
3711 h->b8_stride= s->mb_width*2;
3713 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3714 if(h->sps.frame_mbs_only_flag)
3715 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3717 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3719 if (s->context_initialized
3720 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3722 return -1; // width / height changed during parallelized decoding
3724 flush_dpb(s->avctx);
3727 if (!s->context_initialized) {
3729 return -1; // we cant (re-)initialize context during parallel decoding
3730 if (MPV_common_init(s) < 0)
3734 init_scan_tables(h);
3737 for(i = 1; i < s->avctx->thread_count; i++) {
3739 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3740 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3741 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3744 init_scan_tables(c);
3748 for(i = 0; i < s->avctx->thread_count; i++)
3749 if(context_init(h->thread_context[i]) < 0)
3752 s->avctx->width = s->width;
3753 s->avctx->height = s->height;
3754 s->avctx->sample_aspect_ratio= h->sps.sar;
3755 if(!s->avctx->sample_aspect_ratio.den)
3756 s->avctx->sample_aspect_ratio.den = 1;
3758 if(h->sps.timing_info_present_flag){
3759 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3760 if(h->x264_build > 0 && h->x264_build < 44)
3761 s->avctx->time_base.den *= 2;
3762 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3763 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3767 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3770 h->mb_aff_frame = 0;
3771 last_pic_structure = s0->picture_structure;
3772 if(h->sps.frame_mbs_only_flag){
3773 s->picture_structure= PICT_FRAME;
3775 if(get_bits1(&s->gb)) { //field_pic_flag
3776 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3778 s->picture_structure= PICT_FRAME;
3779 h->mb_aff_frame = h->sps.mb_aff;
3782 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3784 if(h0->current_slice == 0){
3785 while(h->frame_num != h->prev_frame_num &&
3786 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3787 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3789 h->prev_frame_num++;
3790 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3791 s->current_picture_ptr->frame_num= h->prev_frame_num;
3792 execute_ref_pic_marking(h, NULL, 0);
3795 /* See if we have a decoded first field looking for a pair... */
3796 if (s0->first_field) {
3797 assert(s0->current_picture_ptr);
3798 assert(s0->current_picture_ptr->data[0]);
3799 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3801 /* figure out if we have a complementary field pair */
3802 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3804 * Previous field is unmatched. Don't display it, but let it
3805 * remain for reference if marked as such.
3807 s0->current_picture_ptr = NULL;
3808 s0->first_field = FIELD_PICTURE;
3811 if (h->nal_ref_idc &&
3812 s0->current_picture_ptr->reference &&
3813 s0->current_picture_ptr->frame_num != h->frame_num) {
3815 * This and previous field were reference, but had
3816 * different frame_nums. Consider this field first in
3817 * pair. Throw away previous field except for reference
3820 s0->first_field = 1;
3821 s0->current_picture_ptr = NULL;
3824 /* Second field in complementary pair */
3825 s0->first_field = 0;
3830 /* Frame or first field in a potentially complementary pair */
3831 assert(!s0->current_picture_ptr);
3832 s0->first_field = FIELD_PICTURE;
3835 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3836 s0->first_field = 0;
3843 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3845 assert(s->mb_num == s->mb_width * s->mb_height);
3846 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3847 first_mb_in_slice >= s->mb_num){
3848 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3851 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3852 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3853 if (s->picture_structure == PICT_BOTTOM_FIELD)
3854 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3855 assert(s->mb_y < s->mb_height);
3857 if(s->picture_structure==PICT_FRAME){
3858 h->curr_pic_num= h->frame_num;
3859 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3861 h->curr_pic_num= 2*h->frame_num + 1;
3862 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3865 if(h->nal_unit_type == NAL_IDR_SLICE){
3866 get_ue_golomb(&s->gb); /* idr_pic_id */
3869 if(h->sps.poc_type==0){
3870 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3872 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3873 h->delta_poc_bottom= get_se_golomb(&s->gb);
3877 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3878 h->delta_poc[0]= get_se_golomb(&s->gb);
3880 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3881 h->delta_poc[1]= get_se_golomb(&s->gb);
3886 if(h->pps.redundant_pic_cnt_present){
3887 h->redundant_pic_count= get_ue_golomb(&s->gb);
3890 //set defaults, might be overridden a few lines later
3891 h->ref_count[0]= h->pps.ref_count[0];
3892 h->ref_count[1]= h->pps.ref_count[1];
3894 if(h->slice_type_nos != FF_I_TYPE){
3895 if(h->slice_type_nos == FF_B_TYPE){
3896 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3898 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3900 if(num_ref_idx_active_override_flag){
3901 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3902 if(h->slice_type_nos==FF_B_TYPE)
3903 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3905 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3906 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3907 h->ref_count[0]= h->ref_count[1]= 1;
3911 if(h->slice_type_nos == FF_B_TYPE)
3918 if(!default_ref_list_done){
3919 fill_default_ref_list(h);
3922 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3925 if(h->slice_type_nos!=FF_I_TYPE){
3926 s->last_picture_ptr= &h->ref_list[0][0];
3927 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3929 if(h->slice_type_nos==FF_B_TYPE){
3930 s->next_picture_ptr= &h->ref_list[1][0];
3931 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3934 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3935 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3936 pred_weight_table(h);
3937 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3938 implicit_weight_table(h);
3943 decode_ref_pic_marking(h0, &s->gb);
3946 fill_mbaff_ref_list(h);
3948 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3949 direct_dist_scale_factor(h);
3950 direct_ref_list_init(h);
3952 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3953 tmp = get_ue_golomb_31(&s->gb);
3955 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3958 h->cabac_init_idc= tmp;
3961 h->last_qscale_diff = 0;
3962 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3964 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3968 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3969 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3970 //FIXME qscale / qp ... stuff
3971 if(h->slice_type == FF_SP_TYPE){
3972 get_bits1(&s->gb); /* sp_for_switch_flag */
3974 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3975 get_se_golomb(&s->gb); /* slice_qs_delta */
3978 h->deblocking_filter = 1;
3979 h->slice_alpha_c0_offset = 0;
3980 h->slice_beta_offset = 0;
3981 if( h->pps.deblocking_filter_parameters_present ) {
3982 tmp= get_ue_golomb_31(&s->gb);
3984 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3987 h->deblocking_filter= tmp;
3988 if(h->deblocking_filter < 2)
3989 h->deblocking_filter^= 1; // 1<->0
3991 if( h->deblocking_filter ) {
3992 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3993 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3997 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3998 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3999 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4000 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4001 h->deblocking_filter= 0;
4003 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4004 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4005 /* Cheat slightly for speed:
4006 Do not bother to deblock across slices. */
4007 h->deblocking_filter = 2;
4009 h0->max_contexts = 1;
4010 if(!h0->single_decode_warning) {
4011 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4012 h0->single_decode_warning = 1;
4015 return 1; // deblocking switched inside frame
4020 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4021 slice_group_change_cycle= get_bits(&s->gb, ?);
4024 h0->last_slice_type = slice_type;
4025 h->slice_num = ++h0->current_slice;
4026 if(h->slice_num >= MAX_SLICES){
4027 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4031 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4035 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4036 +(h->ref_list[j][i].reference&3);
4039 for(i=16; i<48; i++)
4040 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4041 +(h->ref_list[j][i].reference&3);
4044 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4045 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4047 s->avctx->refs= h->sps.ref_frame_count;
4049 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4050 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4052 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4054 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4055 pps_id, h->frame_num,
4056 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4057 h->ref_count[0], h->ref_count[1],
4059 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4061 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4062 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4072 static inline int get_level_prefix(GetBitContext *gb){
4076 OPEN_READER(re, gb);
4077 UPDATE_CACHE(re, gb);
4078 buf=GET_CACHE(re, gb);
4080 log= 32 - av_log2(buf);
4082 print_bin(buf>>(32-log), log);
4083 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4086 LAST_SKIP_BITS(re, gb, log);
4087 CLOSE_READER(re, gb);
4092 static inline int get_dct8x8_allowed(H264Context *h){
4093 if(h->sps.direct_8x8_inference_flag)
4094 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4096 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4100 * decodes a residual block.
4101 * @param n block index
4102 * @param scantable scantable
4103 * @param max_coeff number of coefficients in the block
4104 * @return <0 if an error occurred
4106 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4107 MpegEncContext * const s = &h->s;
4108 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4110 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4112 //FIXME put trailing_onex into the context
4114 if(n == CHROMA_DC_BLOCK_INDEX){
4115 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4116 total_coeff= coeff_token>>2;
4118 if(n == LUMA_DC_BLOCK_INDEX){
4119 total_coeff= pred_non_zero_count(h, 0);
4120 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4121 total_coeff= coeff_token>>2;
4123 total_coeff= pred_non_zero_count(h, n);
4124 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4125 total_coeff= coeff_token>>2;
4126 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4130 //FIXME set last_non_zero?
4134 if(total_coeff > (unsigned)max_coeff) {
4135 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4139 trailing_ones= coeff_token&3;
4140 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4141 assert(total_coeff<=16);
4143 i = show_bits(gb, 3);
4144 skip_bits(gb, trailing_ones);
4145 level[0] = 1-((i&4)>>1);
4146 level[1] = 1-((i&2) );
4147 level[2] = 1-((i&1)<<1);
4149 if(trailing_ones<total_coeff) {
4151 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4152 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4153 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4155 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4156 if(level_code >= 100){
4157 prefix= level_code - 100;
4158 if(prefix == LEVEL_TAB_BITS)
4159 prefix += get_level_prefix(gb);
4161 //first coefficient has suffix_length equal to 0 or 1
4162 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4164 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4166 level_code= (prefix<<suffix_length); //part
4167 }else if(prefix==14){
4169 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4171 level_code= prefix + get_bits(gb, 4); //part
4173 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4174 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4176 level_code += (1<<(prefix-3))-4096;
4179 if(trailing_ones < 3) level_code += 2;
4182 mask= -(level_code&1);
4183 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4185 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4188 if(level_code + 3U > 6U)
4190 level[trailing_ones]= level_code;
4193 //remaining coefficients have suffix_length > 0
4194 for(i=trailing_ones+1;i<total_coeff;i++) {
4195 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4196 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4197 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4199 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4200 if(level_code >= 100){
4201 prefix= level_code - 100;
4202 if(prefix == LEVEL_TAB_BITS){
4203 prefix += get_level_prefix(gb);
4206 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4208 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4210 level_code += (1<<(prefix-3))-4096;
4212 mask= -(level_code&1);
4213 level_code= (((2+level_code)>>1) ^ mask) - mask;
4215 level[i]= level_code;
4217 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4222 if(total_coeff == max_coeff)
4225 if(n == CHROMA_DC_BLOCK_INDEX)
4226 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4228 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4231 coeff_num = zeros_left + total_coeff - 1;
4232 j = scantable[coeff_num];
4234 block[j] = level[0];
4235 for(i=1;i<total_coeff;i++) {
4238 else if(zeros_left < 7){
4239 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4241 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4243 zeros_left -= run_before;
4244 coeff_num -= 1 + run_before;
4245 j= scantable[ coeff_num ];
4250 block[j] = (level[0] * qmul[j] + 32)>>6;
4251 for(i=1;i<total_coeff;i++) {
4254 else if(zeros_left < 7){
4255 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4257 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4259 zeros_left -= run_before;
4260 coeff_num -= 1 + run_before;
4261 j= scantable[ coeff_num ];
4263 block[j]= (level[i] * qmul[j] + 32)>>6;
4268 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4275 static void predict_field_decoding_flag(H264Context *h){
4276 MpegEncContext * const s = &h->s;
4277 const int mb_xy= h->mb_xy;
4278 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4279 ? s->current_picture.mb_type[mb_xy-1]
4280 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4281 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4283 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4287 * decodes a P_SKIP or B_SKIP macroblock
4289 static void decode_mb_skip(H264Context *h){
4290 MpegEncContext * const s = &h->s;
4291 const int mb_xy= h->mb_xy;
4294 memset(h->non_zero_count[mb_xy], 0, 16);
4295 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4298 mb_type|= MB_TYPE_INTERLACED;
4300 if( h->slice_type_nos == FF_B_TYPE )
4302 // just for fill_caches. pred_direct_motion will set the real mb_type
4303 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4305 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4306 pred_direct_motion(h, &mb_type);
4307 mb_type|= MB_TYPE_SKIP;
4312 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4314 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4315 pred_pskip_motion(h, &mx, &my);
4316 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4317 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4320 write_back_motion(h, mb_type);
4321 s->current_picture.mb_type[mb_xy]= mb_type;
4322 s->current_picture.qscale_table[mb_xy]= s->qscale;
4323 h->slice_table[ mb_xy ]= h->slice_num;
4324 h->prev_mb_skipped= 1;
4328 * decodes a macroblock
4329 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4331 static int decode_mb_cavlc(H264Context *h){
4332 MpegEncContext * const s = &h->s;
4334 int partition_count;
4335 unsigned int mb_type, cbp;
4336 int dct8x8_allowed= h->pps.transform_8x8_mode;
4338 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4340 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4341 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4343 if(h->slice_type_nos != FF_I_TYPE){
4344 if(s->mb_skip_run==-1)
4345 s->mb_skip_run= get_ue_golomb(&s->gb);
4347 if (s->mb_skip_run--) {
4348 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4349 if(s->mb_skip_run==0)
4350 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4352 predict_field_decoding_flag(h);
4359 if( (s->mb_y&1) == 0 )
4360 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4363 h->prev_mb_skipped= 0;
4365 mb_type= get_ue_golomb(&s->gb);
4366 if(h->slice_type_nos == FF_B_TYPE){
4368 partition_count= b_mb_type_info[mb_type].partition_count;
4369 mb_type= b_mb_type_info[mb_type].type;
4372 goto decode_intra_mb;
4374 }else if(h->slice_type_nos == FF_P_TYPE){
4376 partition_count= p_mb_type_info[mb_type].partition_count;
4377 mb_type= p_mb_type_info[mb_type].type;
4380 goto decode_intra_mb;
4383 assert(h->slice_type_nos == FF_I_TYPE);
4384 if(h->slice_type == FF_SI_TYPE && mb_type)
4388 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4392 cbp= i_mb_type_info[mb_type].cbp;
4393 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4394 mb_type= i_mb_type_info[mb_type].type;
4398 mb_type |= MB_TYPE_INTERLACED;
4400 h->slice_table[ mb_xy ]= h->slice_num;
4402 if(IS_INTRA_PCM(mb_type)){
4405 // We assume these blocks are very rare so we do not optimize it.
4406 align_get_bits(&s->gb);
4408 // The pixels are stored in the same order as levels in h->mb array.
4409 for(x=0; x < (CHROMA ? 384 : 256); x++){
4410 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4413 // In deblocking, the quantizer is 0
4414 s->current_picture.qscale_table[mb_xy]= 0;
4415 // All coeffs are present
4416 memset(h->non_zero_count[mb_xy], 16, 16);
4418 s->current_picture.mb_type[mb_xy]= mb_type;
4423 h->ref_count[0] <<= 1;
4424 h->ref_count[1] <<= 1;
4427 fill_caches(h, mb_type, 0);
4430 if(IS_INTRA(mb_type)){
4432 // init_top_left_availability(h);
4433 if(IS_INTRA4x4(mb_type)){
4436 if(dct8x8_allowed && get_bits1(&s->gb)){
4437 mb_type |= MB_TYPE_8x8DCT;
4441 // fill_intra4x4_pred_table(h);
4442 for(i=0; i<16; i+=di){
4443 int mode= pred_intra_mode(h, i);
4445 if(!get_bits1(&s->gb)){
4446 const int rem_mode= get_bits(&s->gb, 3);
4447 mode = rem_mode + (rem_mode >= mode);
4451 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4453 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4455 write_back_intra_pred_mode(h);
4456 if( check_intra4x4_pred_mode(h) < 0)
4459 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4460 if(h->intra16x16_pred_mode < 0)
4464 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4467 h->chroma_pred_mode= pred_mode;
4469 }else if(partition_count==4){
4470 int i, j, sub_partition_count[4], list, ref[2][4];
4472 if(h->slice_type_nos == FF_B_TYPE){
4474 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4475 if(h->sub_mb_type[i] >=13){
4476 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4479 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4480 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4482 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4483 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4484 pred_direct_motion(h, &mb_type);
4485 h->ref_cache[0][scan8[4]] =
4486 h->ref_cache[1][scan8[4]] =
4487 h->ref_cache[0][scan8[12]] =
4488 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4491 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4493 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4494 if(h->sub_mb_type[i] >=4){
4495 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4498 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4499 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4503 for(list=0; list<h->list_count; list++){
4504 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4506 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4507 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4511 }else if(ref_count == 2){
4512 tmp= get_bits1(&s->gb)^1;
4514 tmp= get_ue_golomb_31(&s->gb);
4516 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4529 dct8x8_allowed = get_dct8x8_allowed(h);
4531 for(list=0; list<h->list_count; list++){
4533 if(IS_DIRECT(h->sub_mb_type[i])) {
4534 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4537 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4538 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4540 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4541 const int sub_mb_type= h->sub_mb_type[i];
4542 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4543 for(j=0; j<sub_partition_count[i]; j++){
4545 const int index= 4*i + block_width*j;
4546 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4547 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4548 mx += get_se_golomb(&s->gb);
4549 my += get_se_golomb(&s->gb);
4550 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4552 if(IS_SUB_8X8(sub_mb_type)){
4554 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4556 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4557 }else if(IS_SUB_8X4(sub_mb_type)){
4558 mv_cache[ 1 ][0]= mx;
4559 mv_cache[ 1 ][1]= my;
4560 }else if(IS_SUB_4X8(sub_mb_type)){
4561 mv_cache[ 8 ][0]= mx;
4562 mv_cache[ 8 ][1]= my;
4564 mv_cache[ 0 ][0]= mx;
4565 mv_cache[ 0 ][1]= my;
4568 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4574 }else if(IS_DIRECT(mb_type)){
4575 pred_direct_motion(h, &mb_type);
4576 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4578 int list, mx, my, i;
4579 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4580 if(IS_16X16(mb_type)){
4581 for(list=0; list<h->list_count; list++){
4583 if(IS_DIR(mb_type, 0, list)){
4584 if(h->ref_count[list]==1){
4586 }else if(h->ref_count[list]==2){
4587 val= get_bits1(&s->gb)^1;
4589 val= get_ue_golomb_31(&s->gb);
4590 if(val >= h->ref_count[list]){
4591 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4596 val= LIST_NOT_USED&0xFF;
4597 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4599 for(list=0; list<h->list_count; list++){
4601 if(IS_DIR(mb_type, 0, list)){
4602 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4603 mx += get_se_golomb(&s->gb);
4604 my += get_se_golomb(&s->gb);
4605 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4607 val= pack16to32(mx,my);
4610 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4613 else if(IS_16X8(mb_type)){
4614 for(list=0; list<h->list_count; list++){
4617 if(IS_DIR(mb_type, i, list)){
4618 if(h->ref_count[list] == 1){
4620 }else if(h->ref_count[list] == 2){
4621 val= get_bits1(&s->gb)^1;
4623 val= get_ue_golomb_31(&s->gb);
4624 if(val >= h->ref_count[list]){
4625 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4630 val= LIST_NOT_USED&0xFF;
4631 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4634 for(list=0; list<h->list_count; list++){
4637 if(IS_DIR(mb_type, i, list)){
4638 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4639 mx += get_se_golomb(&s->gb);
4640 my += get_se_golomb(&s->gb);
4641 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4643 val= pack16to32(mx,my);
4646 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4650 assert(IS_8X16(mb_type));
4651 for(list=0; list<h->list_count; list++){
4654 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4655 if(h->ref_count[list]==1){
4657 }else if(h->ref_count[list]==2){
4658 val= get_bits1(&s->gb)^1;
4660 val= get_ue_golomb_31(&s->gb);
4661 if(val >= h->ref_count[list]){
4662 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4667 val= LIST_NOT_USED&0xFF;
4668 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4671 for(list=0; list<h->list_count; list++){
4674 if(IS_DIR(mb_type, i, list)){
4675 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4676 mx += get_se_golomb(&s->gb);
4677 my += get_se_golomb(&s->gb);
4678 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4680 val= pack16to32(mx,my);
4683 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4689 if(IS_INTER(mb_type))
4690 write_back_motion(h, mb_type);
4692 if(!IS_INTRA16x16(mb_type)){
4693 cbp= get_ue_golomb(&s->gb);
4695 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4700 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4701 else cbp= golomb_to_inter_cbp [cbp];
4703 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4704 else cbp= golomb_to_inter_cbp_gray[cbp];
4709 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4710 if(get_bits1(&s->gb)){
4711 mb_type |= MB_TYPE_8x8DCT;
4712 h->cbp_table[mb_xy]= cbp;
4715 s->current_picture.mb_type[mb_xy]= mb_type;
4717 if(cbp || IS_INTRA16x16(mb_type)){
4718 int i8x8, i4x4, chroma_idx;
4720 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4721 const uint8_t *scan, *scan8x8, *dc_scan;
4723 // fill_non_zero_count_cache(h);
4725 if(IS_INTERLACED(mb_type)){
4726 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4727 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4728 dc_scan= luma_dc_field_scan;
4730 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4731 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4732 dc_scan= luma_dc_zigzag_scan;
4735 dquant= get_se_golomb(&s->gb);
4737 if( dquant > 25 || dquant < -26 ){
4738 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4742 s->qscale += dquant;
4743 if(((unsigned)s->qscale) > 51){
4744 if(s->qscale<0) s->qscale+= 52;
4745 else s->qscale-= 52;
4748 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4749 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4750 if(IS_INTRA16x16(mb_type)){
4751 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4752 return -1; //FIXME continue if partitioned and other return -1 too
4755 assert((cbp&15) == 0 || (cbp&15) == 15);
4758 for(i8x8=0; i8x8<4; i8x8++){
4759 for(i4x4=0; i4x4<4; i4x4++){
4760 const int index= i4x4 + 4*i8x8;
4761 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4767 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4770 for(i8x8=0; i8x8<4; i8x8++){
4771 if(cbp & (1<<i8x8)){
4772 if(IS_8x8DCT(mb_type)){
4773 DCTELEM *buf = &h->mb[64*i8x8];
4775 for(i4x4=0; i4x4<4; i4x4++){
4776 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4777 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4780 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4781 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4783 for(i4x4=0; i4x4<4; i4x4++){
4784 const int index= i4x4 + 4*i8x8;
4786 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4792 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4793 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4799 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4800 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4806 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4807 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4808 for(i4x4=0; i4x4<4; i4x4++){
4809 const int index= 16 + 4*chroma_idx + i4x4;
4810 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4816 uint8_t * const nnz= &h->non_zero_count_cache[0];
4817 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4818 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4821 uint8_t * const nnz= &h->non_zero_count_cache[0];
4822 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4823 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4824 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4826 s->current_picture.qscale_table[mb_xy]= s->qscale;
4827 write_back_non_zero_count(h);
4830 h->ref_count[0] >>= 1;
4831 h->ref_count[1] >>= 1;
4837 static int decode_cabac_field_decoding_flag(H264Context *h) {
4838 MpegEncContext * const s = &h->s;
4839 const int mb_x = s->mb_x;
4840 const int mb_y = s->mb_y & ~1;
4841 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4842 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4844 unsigned int ctx = 0;
4846 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4849 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4853 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4856 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4857 uint8_t *state= &h->cabac_state[ctx_base];
4861 MpegEncContext * const s = &h->s;
4862 const int mba_xy = h->left_mb_xy[0];
4863 const int mbb_xy = h->top_mb_xy;
4865 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4867 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4869 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4870 return 0; /* I4x4 */
4873 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4874 return 0; /* I4x4 */
4877 if( get_cabac_terminate( &h->cabac ) )
4878 return 25; /* PCM */
4880 mb_type = 1; /* I16x16 */
4881 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4882 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4883 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4884 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4885 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4889 static int decode_cabac_mb_type_b( H264Context *h ) {
4890 MpegEncContext * const s = &h->s;
4892 const int mba_xy = h->left_mb_xy[0];
4893 const int mbb_xy = h->top_mb_xy;
4896 assert(h->slice_type_nos == FF_B_TYPE);
4898 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4900 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4903 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4904 return 0; /* B_Direct_16x16 */
4906 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4907 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4910 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4911 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4912 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4913 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4915 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4916 else if( bits == 13 ) {
4917 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4918 } else if( bits == 14 )
4919 return 11; /* B_L1_L0_8x16 */
4920 else if( bits == 15 )
4921 return 22; /* B_8x8 */
4923 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4924 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4927 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4928 MpegEncContext * const s = &h->s;
4932 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4933 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4936 && h->slice_table[mba_xy] == h->slice_num
4937 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4938 mba_xy += s->mb_stride;
4940 mbb_xy = mb_xy - s->mb_stride;
4942 && h->slice_table[mbb_xy] == h->slice_num
4943 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4944 mbb_xy -= s->mb_stride;
4946 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4948 int mb_xy = h->mb_xy;
4950 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4953 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4955 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4958 if( h->slice_type_nos == FF_B_TYPE )
4960 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4963 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4966 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4969 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4970 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4971 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4973 if( mode >= pred_mode )
4979 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4980 const int mba_xy = h->left_mb_xy[0];
4981 const int mbb_xy = h->top_mb_xy;
4985 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4986 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4989 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4992 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4995 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4997 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5003 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5004 int cbp_b, cbp_a, ctx, cbp = 0;
5006 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5007 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5009 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5010 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5011 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5012 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5013 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5014 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5015 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5016 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5019 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5023 cbp_a = (h->left_cbp>>4)&0x03;
5024 cbp_b = (h-> top_cbp>>4)&0x03;
5027 if( cbp_a > 0 ) ctx++;
5028 if( cbp_b > 0 ) ctx += 2;
5029 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5033 if( cbp_a == 2 ) ctx++;
5034 if( cbp_b == 2 ) ctx += 2;
5035 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5037 static int decode_cabac_mb_dqp( H264Context *h) {
5038 int ctx= h->last_qscale_diff != 0;
5041 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5044 if(val > 102) //prevent infinite loop
5049 return (val + 1)>>1 ;
5051 return -((val + 1)>>1);
5053 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5054 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5056 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5058 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5062 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5064 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5065 return 0; /* B_Direct_8x8 */
5066 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5067 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5069 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5070 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5071 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5074 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5075 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5079 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5080 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5083 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5084 int refa = h->ref_cache[list][scan8[n] - 1];
5085 int refb = h->ref_cache[list][scan8[n] - 8];
5089 if( h->slice_type_nos == FF_B_TYPE) {
5090 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5092 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5101 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5104 if(ref >= 32 /*h->ref_list[list]*/){
5111 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5112 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5113 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5114 int ctxbase = (l == 0) ? 40 : 47;
5116 int ctx = (amvd>2) + (amvd>32);
5118 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5123 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5131 while( get_cabac_bypass( &h->cabac ) ) {
5135 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5140 if( get_cabac_bypass( &h->cabac ) )
5144 return get_cabac_bypass_sign( &h->cabac, -mvd );
5147 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5153 nza = h->left_cbp&0x100;
5154 nzb = h-> top_cbp&0x100;
5156 nza = (h->left_cbp>>(6+idx))&0x01;
5157 nzb = (h-> top_cbp>>(6+idx))&0x01;
5160 assert(cat == 1 || cat == 2 || cat == 4);
5161 nza = h->non_zero_count_cache[scan8[idx] - 1];
5162 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5171 return ctx + 4 * cat;
5174 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5175 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5176 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5177 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5178 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5181 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5182 static const int significant_coeff_flag_offset[2][6] = {
5183 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5184 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5186 static const int last_coeff_flag_offset[2][6] = {
5187 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5188 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5190 static const int coeff_abs_level_m1_offset[6] = {
5191 227+0, 227+10, 227+20, 227+30, 227+39, 426
5193 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5194 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5195 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5196 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5197 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5198 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5199 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5200 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5201 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5203 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5204 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5205 * map node ctx => cabac ctx for level=1 */
5206 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5207 /* map node ctx => cabac ctx for level>1 */
5208 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5209 static const uint8_t coeff_abs_level_transition[2][8] = {
5210 /* update node ctx after decoding a level=1 */
5211 { 1, 2, 3, 3, 4, 5, 6, 7 },
5212 /* update node ctx after decoding a level>1 */
5213 { 4, 4, 4, 4, 5, 6, 7, 7 }
5219 int coeff_count = 0;
5222 uint8_t *significant_coeff_ctx_base;
5223 uint8_t *last_coeff_ctx_base;
5224 uint8_t *abs_level_m1_ctx_base;
5227 #define CABAC_ON_STACK
5229 #ifdef CABAC_ON_STACK
5232 cc.range = h->cabac.range;
5233 cc.low = h->cabac.low;
5234 cc.bytestream= h->cabac.bytestream;
5236 #define CC &h->cabac
5240 /* cat: 0-> DC 16x16 n = 0
5241 * 1-> AC 16x16 n = luma4x4idx
5242 * 2-> Luma4x4 n = luma4x4idx
5243 * 3-> DC Chroma n = iCbCr
5244 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5245 * 5-> Luma8x8 n = 4 * luma8x8idx
5248 /* read coded block flag */
5249 if( is_dc || cat != 5 ) {
5250 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5252 h->non_zero_count_cache[scan8[n]] = 0;
5254 #ifdef CABAC_ON_STACK
5255 h->cabac.range = cc.range ;
5256 h->cabac.low = cc.low ;
5257 h->cabac.bytestream= cc.bytestream;
5263 significant_coeff_ctx_base = h->cabac_state
5264 + significant_coeff_flag_offset[MB_FIELD][cat];
5265 last_coeff_ctx_base = h->cabac_state
5266 + last_coeff_flag_offset[MB_FIELD][cat];
5267 abs_level_m1_ctx_base = h->cabac_state
5268 + coeff_abs_level_m1_offset[cat];
5270 if( !is_dc && cat == 5 ) {
5271 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5272 for(last= 0; last < coefs; last++) { \
5273 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5274 if( get_cabac( CC, sig_ctx )) { \
5275 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5276 index[coeff_count++] = last; \
5277 if( get_cabac( CC, last_ctx ) ) { \
5283 if( last == max_coeff -1 ) {\
5284 index[coeff_count++] = last;\
5286 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5287 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5288 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5290 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5292 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5294 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5297 assert(coeff_count > 0);
5301 h->cbp_table[h->mb_xy] |= 0x100;
5303 h->cbp_table[h->mb_xy] |= 0x40 << n;
5306 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5308 assert( cat == 1 || cat == 2 || cat == 4 );
5309 h->non_zero_count_cache[scan8[n]] = coeff_count;
5314 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5316 int j= scantable[index[--coeff_count]];
5318 if( get_cabac( CC, ctx ) == 0 ) {
5319 node_ctx = coeff_abs_level_transition[0][node_ctx];
5321 block[j] = get_cabac_bypass_sign( CC, -1);
5323 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5327 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5328 node_ctx = coeff_abs_level_transition[1][node_ctx];
5330 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5334 if( coeff_abs >= 15 ) {
5336 while( get_cabac_bypass( CC ) ) {
5342 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5348 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5350 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5353 } while( coeff_count );
5354 #ifdef CABAC_ON_STACK
5355 h->cabac.range = cc.range ;
5356 h->cabac.low = cc.low ;
5357 h->cabac.bytestream= cc.bytestream;
5362 #ifndef CONFIG_SMALL
5363 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5364 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5367 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5368 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5372 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5374 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5376 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5377 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5381 static inline void compute_mb_neighbors(H264Context *h)
5383 MpegEncContext * const s = &h->s;
5384 const int mb_xy = h->mb_xy;
5385 h->top_mb_xy = mb_xy - s->mb_stride;
5386 h->left_mb_xy[0] = mb_xy - 1;
5388 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5389 const int top_pair_xy = pair_xy - s->mb_stride;
5390 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5391 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5392 const int curr_mb_field_flag = MB_FIELD;
5393 const int bottom = (s->mb_y & 1);
5395 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5396 h->top_mb_xy -= s->mb_stride;
5398 if (!left_mb_field_flag == curr_mb_field_flag) {
5399 h->left_mb_xy[0] = pair_xy - 1;
5401 } else if (FIELD_PICTURE) {
5402 h->top_mb_xy -= s->mb_stride;
5408 * decodes a macroblock
5409 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5411 static int decode_mb_cabac(H264Context *h) {
5412 MpegEncContext * const s = &h->s;
5414 int mb_type, partition_count, cbp = 0;
5415 int dct8x8_allowed= h->pps.transform_8x8_mode;
5417 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5419 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5420 if( h->slice_type_nos != FF_I_TYPE ) {
5422 /* a skipped mb needs the aff flag from the following mb */
5423 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5424 predict_field_decoding_flag(h);
5425 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5426 skip = h->next_mb_skipped;
5428 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5429 /* read skip flags */
5431 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5432 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5433 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5434 if(!h->next_mb_skipped)
5435 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5440 h->cbp_table[mb_xy] = 0;
5441 h->chroma_pred_mode_table[mb_xy] = 0;
5442 h->last_qscale_diff = 0;
5449 if( (s->mb_y&1) == 0 )
5451 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5454 h->prev_mb_skipped = 0;
5456 compute_mb_neighbors(h);
5458 if( h->slice_type_nos == FF_B_TYPE ) {
5459 mb_type = decode_cabac_mb_type_b( h );
5461 partition_count= b_mb_type_info[mb_type].partition_count;
5462 mb_type= b_mb_type_info[mb_type].type;
5465 goto decode_intra_mb;
5467 } else if( h->slice_type_nos == FF_P_TYPE ) {
5468 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5470 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5471 /* P_L0_D16x16, P_8x8 */
5472 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5474 /* P_L0_D8x16, P_L0_D16x8 */
5475 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5477 partition_count= p_mb_type_info[mb_type].partition_count;
5478 mb_type= p_mb_type_info[mb_type].type;
5480 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5481 goto decode_intra_mb;
5484 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5485 if(h->slice_type == FF_SI_TYPE && mb_type)
5487 assert(h->slice_type_nos == FF_I_TYPE);
5489 partition_count = 0;
5490 cbp= i_mb_type_info[mb_type].cbp;
5491 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5492 mb_type= i_mb_type_info[mb_type].type;
5495 mb_type |= MB_TYPE_INTERLACED;
5497 h->slice_table[ mb_xy ]= h->slice_num;
5499 if(IS_INTRA_PCM(mb_type)) {
5502 // We assume these blocks are very rare so we do not optimize it.
5503 // FIXME The two following lines get the bitstream position in the cabac
5504 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5505 ptr= h->cabac.bytestream;
5506 if(h->cabac.low&0x1) ptr--;
5508 if(h->cabac.low&0x1FF) ptr--;
5511 // The pixels are stored in the same order as levels in h->mb array.
5512 memcpy(h->mb, ptr, 256); ptr+=256;
5514 memcpy(h->mb+128, ptr, 128); ptr+=128;
5517 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5519 // All blocks are present
5520 h->cbp_table[mb_xy] = 0x1ef;
5521 h->chroma_pred_mode_table[mb_xy] = 0;
5522 // In deblocking, the quantizer is 0
5523 s->current_picture.qscale_table[mb_xy]= 0;
5524 // All coeffs are present
5525 memset(h->non_zero_count[mb_xy], 16, 16);
5526 s->current_picture.mb_type[mb_xy]= mb_type;
5527 h->last_qscale_diff = 0;
5532 h->ref_count[0] <<= 1;
5533 h->ref_count[1] <<= 1;
5536 fill_caches(h, mb_type, 0);
5538 if( IS_INTRA( mb_type ) ) {
5540 if( IS_INTRA4x4( mb_type ) ) {
5541 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5542 mb_type |= MB_TYPE_8x8DCT;
5543 for( i = 0; i < 16; i+=4 ) {
5544 int pred = pred_intra_mode( h, i );
5545 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5546 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5549 for( i = 0; i < 16; i++ ) {
5550 int pred = pred_intra_mode( h, i );
5551 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5553 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5556 write_back_intra_pred_mode(h);
5557 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5559 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5560 if( h->intra16x16_pred_mode < 0 ) return -1;
5563 h->chroma_pred_mode_table[mb_xy] =
5564 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5566 pred_mode= check_intra_pred_mode( h, pred_mode );
5567 if( pred_mode < 0 ) return -1;
5568 h->chroma_pred_mode= pred_mode;
5570 } else if( partition_count == 4 ) {
5571 int i, j, sub_partition_count[4], list, ref[2][4];
5573 if( h->slice_type_nos == FF_B_TYPE ) {
5574 for( i = 0; i < 4; i++ ) {
5575 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5576 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5577 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5579 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5580 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5581 pred_direct_motion(h, &mb_type);
5582 h->ref_cache[0][scan8[4]] =
5583 h->ref_cache[1][scan8[4]] =
5584 h->ref_cache[0][scan8[12]] =
5585 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5586 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5587 for( i = 0; i < 4; i++ )
5588 if( IS_DIRECT(h->sub_mb_type[i]) )
5589 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5593 for( i = 0; i < 4; i++ ) {
5594 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5595 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5596 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5600 for( list = 0; list < h->list_count; list++ ) {
5601 for( i = 0; i < 4; i++ ) {
5602 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5603 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5604 if( h->ref_count[list] > 1 ){
5605 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5606 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5607 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5615 h->ref_cache[list][ scan8[4*i]+1 ]=
5616 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5621 dct8x8_allowed = get_dct8x8_allowed(h);
5623 for(list=0; list<h->list_count; list++){
5625 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5626 if(IS_DIRECT(h->sub_mb_type[i])){
5627 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5631 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5632 const int sub_mb_type= h->sub_mb_type[i];
5633 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5634 for(j=0; j<sub_partition_count[i]; j++){
5637 const int index= 4*i + block_width*j;
5638 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5639 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5640 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5642 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5643 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5644 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5646 if(IS_SUB_8X8(sub_mb_type)){
5648 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5650 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5653 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5655 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5656 }else if(IS_SUB_8X4(sub_mb_type)){
5657 mv_cache[ 1 ][0]= mx;
5658 mv_cache[ 1 ][1]= my;
5660 mvd_cache[ 1 ][0]= mx - mpx;
5661 mvd_cache[ 1 ][1]= my - mpy;
5662 }else if(IS_SUB_4X8(sub_mb_type)){
5663 mv_cache[ 8 ][0]= mx;
5664 mv_cache[ 8 ][1]= my;
5666 mvd_cache[ 8 ][0]= mx - mpx;
5667 mvd_cache[ 8 ][1]= my - mpy;
5669 mv_cache[ 0 ][0]= mx;
5670 mv_cache[ 0 ][1]= my;
5672 mvd_cache[ 0 ][0]= mx - mpx;
5673 mvd_cache[ 0 ][1]= my - mpy;
5676 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5677 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5678 p[0] = p[1] = p[8] = p[9] = 0;
5679 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5683 } else if( IS_DIRECT(mb_type) ) {
5684 pred_direct_motion(h, &mb_type);
5685 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5686 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5687 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5689 int list, mx, my, i, mpx, mpy;
5690 if(IS_16X16(mb_type)){
5691 for(list=0; list<h->list_count; list++){
5692 if(IS_DIR(mb_type, 0, list)){
5694 if(h->ref_count[list] > 1){
5695 ref= decode_cabac_mb_ref(h, list, 0);
5696 if(ref >= (unsigned)h->ref_count[list]){
5697 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5702 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5704 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5706 for(list=0; list<h->list_count; list++){
5707 if(IS_DIR(mb_type, 0, list)){
5708 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5710 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5711 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5712 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5714 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5715 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5717 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5720 else if(IS_16X8(mb_type)){
5721 for(list=0; list<h->list_count; list++){
5723 if(IS_DIR(mb_type, i, list)){
5725 if(h->ref_count[list] > 1){
5726 ref= decode_cabac_mb_ref( h, list, 8*i );
5727 if(ref >= (unsigned)h->ref_count[list]){
5728 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5733 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5735 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5738 for(list=0; list<h->list_count; list++){
5740 if(IS_DIR(mb_type, i, list)){
5741 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5742 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5743 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5744 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5746 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5747 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5749 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5750 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5755 assert(IS_8X16(mb_type));
5756 for(list=0; list<h->list_count; list++){
5758 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5760 if(h->ref_count[list] > 1){
5761 ref= decode_cabac_mb_ref( h, list, 4*i );
5762 if(ref >= (unsigned)h->ref_count[list]){
5763 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5768 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5770 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5773 for(list=0; list<h->list_count; list++){
5775 if(IS_DIR(mb_type, i, list)){
5776 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5777 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5778 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5780 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5781 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5782 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5784 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5785 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5792 if( IS_INTER( mb_type ) ) {
5793 h->chroma_pred_mode_table[mb_xy] = 0;
5794 write_back_motion( h, mb_type );
5797 if( !IS_INTRA16x16( mb_type ) ) {
5798 cbp = decode_cabac_mb_cbp_luma( h );
5800 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5803 h->cbp_table[mb_xy] = h->cbp = cbp;
5805 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5806 if( decode_cabac_mb_transform_size( h ) )
5807 mb_type |= MB_TYPE_8x8DCT;
5809 s->current_picture.mb_type[mb_xy]= mb_type;
5811 if( cbp || IS_INTRA16x16( mb_type ) ) {
5812 const uint8_t *scan, *scan8x8, *dc_scan;
5813 const uint32_t *qmul;
5816 if(IS_INTERLACED(mb_type)){
5817 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5818 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5819 dc_scan= luma_dc_field_scan;
5821 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5822 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5823 dc_scan= luma_dc_zigzag_scan;
5826 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5827 if( dqp == INT_MIN ){
5828 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5832 if(((unsigned)s->qscale) > 51){
5833 if(s->qscale<0) s->qscale+= 52;
5834 else s->qscale-= 52;
5836 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5837 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5839 if( IS_INTRA16x16( mb_type ) ) {
5841 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5842 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5845 qmul = h->dequant4_coeff[0][s->qscale];
5846 for( i = 0; i < 16; i++ ) {
5847 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5848 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5851 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5855 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5856 if( cbp & (1<<i8x8) ) {
5857 if( IS_8x8DCT(mb_type) ) {
5858 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5859 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5861 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5862 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5863 const int index = 4*i8x8 + i4x4;
5864 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5866 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5867 //STOP_TIMER("decode_residual")
5871 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5872 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5879 for( c = 0; c < 2; c++ ) {
5880 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5881 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5887 for( c = 0; c < 2; c++ ) {
5888 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5889 for( i = 0; i < 4; i++ ) {
5890 const int index = 16 + 4 * c + i;
5891 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5892 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5896 uint8_t * const nnz= &h->non_zero_count_cache[0];
5897 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5898 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5901 uint8_t * const nnz= &h->non_zero_count_cache[0];
5902 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5903 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5904 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5905 h->last_qscale_diff = 0;
5908 s->current_picture.qscale_table[mb_xy]= s->qscale;
5909 write_back_non_zero_count(h);
5912 h->ref_count[0] >>= 1;
5913 h->ref_count[1] >>= 1;
5920 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5921 const int index_a = qp + h->slice_alpha_c0_offset;
5922 const int alpha = (alpha_table+52)[index_a];
5923 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5927 tc[0] = (tc0_table+52)[index_a][bS[0]];
5928 tc[1] = (tc0_table+52)[index_a][bS[1]];
5929 tc[2] = (tc0_table+52)[index_a][bS[2]];
5930 tc[3] = (tc0_table+52)[index_a][bS[3]];
5931 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5933 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5936 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5937 const int index_a = qp + h->slice_alpha_c0_offset;
5938 const int alpha = (alpha_table+52)[index_a];
5939 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5943 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5944 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5945 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5946 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5947 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5949 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5953 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5955 for( i = 0; i < 16; i++, pix += stride) {
5961 int bS_index = (i >> 1);
5964 bS_index |= (i & 1);
5967 if( bS[bS_index] == 0 ) {
5971 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5972 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5973 alpha = (alpha_table+52)[index_a];
5974 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5976 if( bS[bS_index] < 4 ) {
5977 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5978 const int p0 = pix[-1];
5979 const int p1 = pix[-2];
5980 const int p2 = pix[-3];
5981 const int q0 = pix[0];
5982 const int q1 = pix[1];
5983 const int q2 = pix[2];
5985 if( FFABS( p0 - q0 ) < alpha &&
5986 FFABS( p1 - p0 ) < beta &&
5987 FFABS( q1 - q0 ) < beta ) {
5991 if( FFABS( p2 - p0 ) < beta ) {
5992 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5995 if( FFABS( q2 - q0 ) < beta ) {
5996 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6000 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6001 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6002 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6003 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6006 const int p0 = pix[-1];
6007 const int p1 = pix[-2];
6008 const int p2 = pix[-3];
6010 const int q0 = pix[0];
6011 const int q1 = pix[1];
6012 const int q2 = pix[2];
6014 if( FFABS( p0 - q0 ) < alpha &&
6015 FFABS( p1 - p0 ) < beta &&
6016 FFABS( q1 - q0 ) < beta ) {
6018 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6019 if( FFABS( p2 - p0 ) < beta)
6021 const int p3 = pix[-4];
6023 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6024 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6025 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6028 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6030 if( FFABS( q2 - q0 ) < beta)
6032 const int q3 = pix[3];
6034 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6035 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6036 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6039 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6043 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6044 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6046 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6051 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6053 for( i = 0; i < 8; i++, pix += stride) {
6061 if( bS[bS_index] == 0 ) {
6065 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6066 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6067 alpha = (alpha_table+52)[index_a];
6068 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6070 if( bS[bS_index] < 4 ) {
6071 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6072 const int p0 = pix[-1];
6073 const int p1 = pix[-2];
6074 const int q0 = pix[0];
6075 const int q1 = pix[1];
6077 if( FFABS( p0 - q0 ) < alpha &&
6078 FFABS( p1 - p0 ) < beta &&
6079 FFABS( q1 - q0 ) < beta ) {
6080 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6082 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6083 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6084 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6087 const int p0 = pix[-1];
6088 const int p1 = pix[-2];
6089 const int q0 = pix[0];
6090 const int q1 = pix[1];
6092 if( FFABS( p0 - q0 ) < alpha &&
6093 FFABS( p1 - p0 ) < beta &&
6094 FFABS( q1 - q0 ) < beta ) {
6096 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6097 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6098 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6104 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6105 const int index_a = qp + h->slice_alpha_c0_offset;
6106 const int alpha = (alpha_table+52)[index_a];
6107 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6111 tc[0] = (tc0_table+52)[index_a][bS[0]];
6112 tc[1] = (tc0_table+52)[index_a][bS[1]];
6113 tc[2] = (tc0_table+52)[index_a][bS[2]];
6114 tc[3] = (tc0_table+52)[index_a][bS[3]];
6115 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6117 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6121 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6122 const int index_a = qp + h->slice_alpha_c0_offset;
6123 const int alpha = (alpha_table+52)[index_a];
6124 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6128 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6129 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6130 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6131 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6132 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6134 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6138 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6139 MpegEncContext * const s = &h->s;
6140 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6142 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6146 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6147 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6148 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6149 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6150 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6153 assert(!FRAME_MBAFF);
6155 mb_type = s->current_picture.mb_type[mb_xy];
6156 qp = s->current_picture.qscale_table[mb_xy];
6157 qp0 = s->current_picture.qscale_table[mb_xy-1];
6158 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6159 qpc = get_chroma_qp( h, 0, qp );
6160 qpc0 = get_chroma_qp( h, 0, qp0 );
6161 qpc1 = get_chroma_qp( h, 0, qp1 );
6162 qp0 = (qp + qp0 + 1) >> 1;
6163 qp1 = (qp + qp1 + 1) >> 1;
6164 qpc0 = (qpc + qpc0 + 1) >> 1;
6165 qpc1 = (qpc + qpc1 + 1) >> 1;
6166 qp_thresh = 15 - h->slice_alpha_c0_offset;
6167 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6168 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6171 if( IS_INTRA(mb_type) ) {
6172 int16_t bS4[4] = {4,4,4,4};
6173 int16_t bS3[4] = {3,3,3,3};
6174 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6175 if( IS_8x8DCT(mb_type) ) {
6176 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6177 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6178 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6179 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6181 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6182 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6183 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6184 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6185 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6186 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6187 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6188 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6190 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6191 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6192 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6193 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6194 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6195 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6196 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6197 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6200 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6201 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6203 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6205 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6207 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6208 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6209 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6210 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6212 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6213 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6214 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6215 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6217 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6218 bSv[0][0] = 0x0004000400040004ULL;
6219 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6220 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6222 #define FILTER(hv,dir,edge)\
6223 if(bSv[dir][edge]) {\
6224 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6226 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6227 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6233 } else if( IS_8x8DCT(mb_type) ) {
6253 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6254 MpegEncContext * const s = &h->s;
6256 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6257 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6258 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6259 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6260 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6262 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6263 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6264 // how often to recheck mv-based bS when iterating between edges
6265 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6266 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6267 // how often to recheck mv-based bS when iterating along each edge
6268 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6270 if (first_vertical_edge_done) {
6274 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6277 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6278 && !IS_INTERLACED(mb_type)
6279 && IS_INTERLACED(mbm_type)
6281 // This is a special case in the norm where the filtering must
6282 // be done twice (one each of the field) even if we are in a
6283 // frame macroblock.
6285 static const int nnz_idx[4] = {4,5,6,3};
6286 unsigned int tmp_linesize = 2 * linesize;
6287 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6288 int mbn_xy = mb_xy - 2 * s->mb_stride;
6293 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6294 if( IS_INTRA(mb_type) ||
6295 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6296 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6298 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6299 for( i = 0; i < 4; i++ ) {
6300 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6301 mbn_nnz[nnz_idx[i]] != 0 )
6307 // Do not use s->qscale as luma quantizer because it has not the same
6308 // value in IPCM macroblocks.
6309 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6310 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6311 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6312 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6313 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6314 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6315 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6316 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6323 for( edge = start; edge < edges; edge++ ) {
6324 /* mbn_xy: neighbor macroblock */
6325 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6326 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6327 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6331 if( (edge&1) && IS_8x8DCT(mb_type) )
6334 if( IS_INTRA(mb_type) ||
6335 IS_INTRA(mbn_type) ) {
6338 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6339 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6348 bS[0] = bS[1] = bS[2] = bS[3] = value;
6353 if( edge & mask_edge ) {
6354 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6357 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6358 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6361 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6362 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6363 int bn_idx= b_idx - (dir ? 8:1);
6366 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6367 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6368 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6369 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6372 if(h->slice_type_nos == FF_B_TYPE && v){
6374 for( l = 0; !v && l < 2; l++ ) {
6376 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6377 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6378 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6382 bS[0] = bS[1] = bS[2] = bS[3] = v;
6388 for( i = 0; i < 4; i++ ) {
6389 int x = dir == 0 ? edge : i;
6390 int y = dir == 0 ? i : edge;
6391 int b_idx= 8 + 4 + x + 8*y;
6392 int bn_idx= b_idx - (dir ? 8:1);
6394 if( h->non_zero_count_cache[b_idx] |
6395 h->non_zero_count_cache[bn_idx] ) {
6401 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6402 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6403 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6404 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6410 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6412 for( l = 0; l < 2; l++ ) {
6414 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6415 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6416 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6425 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6430 // Do not use s->qscale as luma quantizer because it has not the same
6431 // value in IPCM macroblocks.
6432 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6433 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6434 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6435 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6437 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6438 if( (edge&1) == 0 ) {
6439 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6440 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6441 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6442 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6445 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6446 if( (edge&1) == 0 ) {
6447 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6448 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6449 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6450 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6456 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6457 MpegEncContext * const s = &h->s;
6458 const int mb_xy= mb_x + mb_y*s->mb_stride;
6459 const int mb_type = s->current_picture.mb_type[mb_xy];
6460 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6461 int first_vertical_edge_done = 0;
6464 //for sufficiently low qp, filtering wouldn't do anything
6465 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6467 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6468 int qp = s->current_picture.qscale_table[mb_xy];
6470 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6471 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6476 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6477 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6478 int top_type, left_type[2];
6479 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6480 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6481 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6483 if(IS_8x8DCT(top_type)){
6484 h->non_zero_count_cache[4+8*0]=
6485 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6486 h->non_zero_count_cache[6+8*0]=
6487 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6489 if(IS_8x8DCT(left_type[0])){
6490 h->non_zero_count_cache[3+8*1]=
6491 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6493 if(IS_8x8DCT(left_type[1])){
6494 h->non_zero_count_cache[3+8*3]=
6495 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6498 if(IS_8x8DCT(mb_type)){
6499 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6500 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6502 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6503 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6505 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6506 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6508 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6509 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6514 // left mb is in picture
6515 && h->slice_table[mb_xy-1] != 0xFFFF
6516 // and current and left pair do not have the same interlaced type
6517 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6518 // and left mb is in the same slice if deblocking_filter == 2
6519 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6520 /* First vertical edge is different in MBAFF frames
6521 * There are 8 different bS to compute and 2 different Qp
6523 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6524 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6529 int mb_qp, mbn0_qp, mbn1_qp;
6531 first_vertical_edge_done = 1;
6533 if( IS_INTRA(mb_type) )
6534 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6536 for( i = 0; i < 8; i++ ) {
6537 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6539 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6541 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6542 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6543 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6545 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6552 mb_qp = s->current_picture.qscale_table[mb_xy];
6553 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6554 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6555 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6556 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6557 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6558 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6559 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6560 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6561 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6562 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6563 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6564 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6567 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6568 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6569 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6570 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6571 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6575 for( dir = 0; dir < 2; dir++ )
6576 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6578 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6579 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6583 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6584 H264Context *h = *(void**)arg;
6585 MpegEncContext * const s = &h->s;
6586 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6590 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6591 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6593 if( h->pps.cabac ) {
6597 align_get_bits( &s->gb );
6600 ff_init_cabac_states( &h->cabac);
6601 ff_init_cabac_decoder( &h->cabac,
6602 s->gb.buffer + get_bits_count(&s->gb)/8,
6603 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6604 /* calculate pre-state */
6605 for( i= 0; i < 460; i++ ) {
6607 if( h->slice_type_nos == FF_I_TYPE )
6608 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6610 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6613 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6615 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6620 int ret = decode_mb_cabac(h);
6622 //STOP_TIMER("decode_mb_cabac")
6624 if(ret>=0) hl_decode_mb(h);
6626 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6629 ret = decode_mb_cabac(h);
6631 if(ret>=0) hl_decode_mb(h);
6634 eos = get_cabac_terminate( &h->cabac );
6636 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6637 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6638 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6642 if( ++s->mb_x >= s->mb_width ) {
6644 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6646 if(FIELD_OR_MBAFF_PICTURE) {
6651 if( eos || s->mb_y >= s->mb_height ) {
6652 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6653 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6660 int ret = decode_mb_cavlc(h);
6662 if(ret>=0) hl_decode_mb(h);
6664 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6666 ret = decode_mb_cavlc(h);
6668 if(ret>=0) hl_decode_mb(h);
6673 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6674 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6679 if(++s->mb_x >= s->mb_width){
6681 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6683 if(FIELD_OR_MBAFF_PICTURE) {
6686 if(s->mb_y >= s->mb_height){
6687 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6689 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6690 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6694 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6701 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6702 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6703 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6704 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6708 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6717 for(;s->mb_y < s->mb_height; s->mb_y++){
6718 for(;s->mb_x < s->mb_width; s->mb_x++){
6719 int ret= decode_mb(h);
6724 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6725 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6730 if(++s->mb_x >= s->mb_width){
6732 if(++s->mb_y >= s->mb_height){
6733 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6734 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6738 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6745 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6746 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6747 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6751 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6758 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6761 return -1; //not reached
6764 static int decode_picture_timing(H264Context *h){
6765 MpegEncContext * const s = &h->s;
6766 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6767 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6768 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6770 if(h->sps.pic_struct_present_flag){
6771 unsigned int i, num_clock_ts;
6772 h->sei_pic_struct = get_bits(&s->gb, 4);
6774 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6777 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6779 for (i = 0 ; i < num_clock_ts ; i++){
6780 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6781 unsigned int full_timestamp_flag;
6782 skip_bits(&s->gb, 2); /* ct_type */
6783 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6784 skip_bits(&s->gb, 5); /* counting_type */
6785 full_timestamp_flag = get_bits(&s->gb, 1);
6786 skip_bits(&s->gb, 1); /* discontinuity_flag */
6787 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6788 skip_bits(&s->gb, 8); /* n_frames */
6789 if(full_timestamp_flag){
6790 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6791 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6792 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6794 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6795 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6796 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6797 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6798 if(get_bits(&s->gb, 1)) /* hours_flag */
6799 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6803 if(h->sps.time_offset_length > 0)
6804 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6811 static int decode_unregistered_user_data(H264Context *h, int size){
6812 MpegEncContext * const s = &h->s;
6813 uint8_t user_data[16+256];
6819 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6820 user_data[i]= get_bits(&s->gb, 8);
6824 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6825 if(e==1 && build>=0)
6826 h->x264_build= build;
6828 if(s->avctx->debug & FF_DEBUG_BUGS)
6829 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6832 skip_bits(&s->gb, 8);
6837 static int decode_sei(H264Context *h){
6838 MpegEncContext * const s = &h->s;
6840 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6845 type+= show_bits(&s->gb, 8);
6846 }while(get_bits(&s->gb, 8) == 255);
6850 size+= show_bits(&s->gb, 8);
6851 }while(get_bits(&s->gb, 8) == 255);
6854 case 1: // Picture timing SEI
6855 if(decode_picture_timing(h) < 0)
6859 if(decode_unregistered_user_data(h, size) < 0)
6863 skip_bits(&s->gb, 8*size);
6866 //FIXME check bits here
6867 align_get_bits(&s->gb);
6873 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6874 MpegEncContext * const s = &h->s;
6876 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6878 if(cpb_count > 32U){
6879 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6883 get_bits(&s->gb, 4); /* bit_rate_scale */
6884 get_bits(&s->gb, 4); /* cpb_size_scale */
6885 for(i=0; i<cpb_count; i++){
6886 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6887 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6888 get_bits1(&s->gb); /* cbr_flag */
6890 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6891 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6892 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6893 sps->time_offset_length = get_bits(&s->gb, 5);
6897 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6898 MpegEncContext * const s = &h->s;
6899 int aspect_ratio_info_present_flag;
6900 unsigned int aspect_ratio_idc;
6902 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6904 if( aspect_ratio_info_present_flag ) {
6905 aspect_ratio_idc= get_bits(&s->gb, 8);
6906 if( aspect_ratio_idc == EXTENDED_SAR ) {
6907 sps->sar.num= get_bits(&s->gb, 16);
6908 sps->sar.den= get_bits(&s->gb, 16);
6909 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6910 sps->sar= pixel_aspect[aspect_ratio_idc];
6912 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6919 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6921 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6922 get_bits1(&s->gb); /* overscan_appropriate_flag */
6925 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6926 get_bits(&s->gb, 3); /* video_format */
6927 get_bits1(&s->gb); /* video_full_range_flag */
6928 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6929 get_bits(&s->gb, 8); /* colour_primaries */
6930 get_bits(&s->gb, 8); /* transfer_characteristics */
6931 get_bits(&s->gb, 8); /* matrix_coefficients */
6935 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6936 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6937 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6940 sps->timing_info_present_flag = get_bits1(&s->gb);
6941 if(sps->timing_info_present_flag){
6942 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6943 sps->time_scale = get_bits_long(&s->gb, 32);
6944 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6947 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6948 if(sps->nal_hrd_parameters_present_flag)
6949 if(decode_hrd_parameters(h, sps) < 0)
6951 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6952 if(sps->vcl_hrd_parameters_present_flag)
6953 if(decode_hrd_parameters(h, sps) < 0)
6955 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6956 get_bits1(&s->gb); /* low_delay_hrd_flag */
6957 sps->pic_struct_present_flag = get_bits1(&s->gb);
6959 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6960 if(sps->bitstream_restriction_flag){
6961 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6962 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6963 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6964 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6965 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6966 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6967 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6969 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6970 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6978 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6979 const uint8_t *jvt_list, const uint8_t *fallback_list){
6980 MpegEncContext * const s = &h->s;
6981 int i, last = 8, next = 8;
6982 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6983 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6984 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6986 for(i=0;i<size;i++){
6988 next = (last + get_se_golomb(&s->gb)) & 0xff;
6989 if(!i && !next){ /* matrix not written, we use the preset one */
6990 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6993 last = factors[scan[i]] = next ? next : last;
6997 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6998 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6999 MpegEncContext * const s = &h->s;
7000 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7001 const uint8_t *fallback[4] = {
7002 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7003 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7004 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7005 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7007 if(get_bits1(&s->gb)){
7008 sps->scaling_matrix_present |= is_sps;
7009 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7010 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7011 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7012 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7013 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7014 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7015 if(is_sps || pps->transform_8x8_mode){
7016 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7017 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7022 static inline int decode_seq_parameter_set(H264Context *h){
7023 MpegEncContext * const s = &h->s;
7024 int profile_idc, level_idc;
7025 unsigned int sps_id;
7029 profile_idc= get_bits(&s->gb, 8);
7030 get_bits1(&s->gb); //constraint_set0_flag
7031 get_bits1(&s->gb); //constraint_set1_flag
7032 get_bits1(&s->gb); //constraint_set2_flag
7033 get_bits1(&s->gb); //constraint_set3_flag
7034 get_bits(&s->gb, 4); // reserved
7035 level_idc= get_bits(&s->gb, 8);
7036 sps_id= get_ue_golomb_31(&s->gb);
7038 if(sps_id >= MAX_SPS_COUNT) {
7039 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7042 sps= av_mallocz(sizeof(SPS));
7046 sps->profile_idc= profile_idc;
7047 sps->level_idc= level_idc;
7049 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7050 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7051 sps->scaling_matrix_present = 0;
7053 if(sps->profile_idc >= 100){ //high profile
7054 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7055 if(sps->chroma_format_idc == 3)
7056 get_bits1(&s->gb); //residual_color_transform_flag
7057 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7058 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7059 sps->transform_bypass = get_bits1(&s->gb);
7060 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7062 sps->chroma_format_idc= 1;
7065 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7066 sps->poc_type= get_ue_golomb_31(&s->gb);
7068 if(sps->poc_type == 0){ //FIXME #define
7069 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7070 } else if(sps->poc_type == 1){//FIXME #define
7071 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7072 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7073 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7074 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7076 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7077 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7081 for(i=0; i<sps->poc_cycle_length; i++)
7082 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7083 }else if(sps->poc_type != 2){
7084 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7088 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7089 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7090 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7093 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7094 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7095 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7096 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7097 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7098 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7102 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7103 if(!sps->frame_mbs_only_flag)
7104 sps->mb_aff= get_bits1(&s->gb);
7108 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7110 #ifndef ALLOW_INTERLACE
7112 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7114 sps->crop= get_bits1(&s->gb);
7116 sps->crop_left = get_ue_golomb(&s->gb);
7117 sps->crop_right = get_ue_golomb(&s->gb);
7118 sps->crop_top = get_ue_golomb(&s->gb);
7119 sps->crop_bottom= get_ue_golomb(&s->gb);
7120 if(sps->crop_left || sps->crop_top){
7121 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7123 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7124 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7130 sps->crop_bottom= 0;
7133 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7134 if( sps->vui_parameters_present_flag )
7135 decode_vui_parameters(h, sps);
7137 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7138 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7139 sps_id, sps->profile_idc, sps->level_idc,
7141 sps->ref_frame_count,
7142 sps->mb_width, sps->mb_height,
7143 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7144 sps->direct_8x8_inference_flag ? "8B8" : "",
7145 sps->crop_left, sps->crop_right,
7146 sps->crop_top, sps->crop_bottom,
7147 sps->vui_parameters_present_flag ? "VUI" : "",
7148 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7151 av_free(h->sps_buffers[sps_id]);
7152 h->sps_buffers[sps_id]= sps;
7160 build_qp_table(PPS *pps, int t, int index)
7163 for(i = 0; i < 52; i++)
7164 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7167 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7168 MpegEncContext * const s = &h->s;
7169 unsigned int pps_id= get_ue_golomb(&s->gb);
7172 if(pps_id >= MAX_PPS_COUNT) {
7173 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7177 pps= av_mallocz(sizeof(PPS));
7180 pps->sps_id= get_ue_golomb_31(&s->gb);
7181 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7182 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7186 pps->cabac= get_bits1(&s->gb);
7187 pps->pic_order_present= get_bits1(&s->gb);
7188 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7189 if(pps->slice_group_count > 1 ){
7190 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7191 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7192 switch(pps->mb_slice_group_map_type){
7195 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7196 | run_length[ i ] |1 |ue(v) |
7201 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7203 | top_left_mb[ i ] |1 |ue(v) |
7204 | bottom_right_mb[ i ] |1 |ue(v) |
7212 | slice_group_change_direction_flag |1 |u(1) |
7213 | slice_group_change_rate_minus1 |1 |ue(v) |
7218 | slice_group_id_cnt_minus1 |1 |ue(v) |
7219 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7221 | slice_group_id[ i ] |1 |u(v) |
7226 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7227 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7228 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7229 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7233 pps->weighted_pred= get_bits1(&s->gb);
7234 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7235 pps->init_qp= get_se_golomb(&s->gb) + 26;
7236 pps->init_qs= get_se_golomb(&s->gb) + 26;
7237 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7238 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7239 pps->constrained_intra_pred= get_bits1(&s->gb);
7240 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7242 pps->transform_8x8_mode= 0;
7243 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7244 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7245 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7247 if(get_bits_count(&s->gb) < bit_length){
7248 pps->transform_8x8_mode= get_bits1(&s->gb);
7249 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7250 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7252 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7255 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7256 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7257 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7258 h->pps.chroma_qp_diff= 1;
7260 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7261 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7262 pps_id, pps->sps_id,
7263 pps->cabac ? "CABAC" : "CAVLC",
7264 pps->slice_group_count,
7265 pps->ref_count[0], pps->ref_count[1],
7266 pps->weighted_pred ? "weighted" : "",
7267 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7268 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7269 pps->constrained_intra_pred ? "CONSTR" : "",
7270 pps->redundant_pic_cnt_present ? "REDU" : "",
7271 pps->transform_8x8_mode ? "8x8DCT" : ""
7275 av_free(h->pps_buffers[pps_id]);
7276 h->pps_buffers[pps_id]= pps;
7284 * Call decode_slice() for each context.
7286 * @param h h264 master context
7287 * @param context_count number of contexts to execute
7289 static void execute_decode_slices(H264Context *h, int context_count){
7290 MpegEncContext * const s = &h->s;
7291 AVCodecContext * const avctx= s->avctx;
7295 if(avctx->codec_id == CODEC_ID_H264_VDPAU)
7297 if(context_count == 1) {
7298 decode_slice(avctx, &h);
7300 for(i = 1; i < context_count; i++) {
7301 hx = h->thread_context[i];
7302 hx->s.error_recognition = avctx->error_recognition;
7303 hx->s.error_count = 0;
7306 avctx->execute(avctx, (void *)decode_slice,
7307 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7309 /* pull back stuff from slices to master context */
7310 hx = h->thread_context[context_count - 1];
7311 s->mb_x = hx->s.mb_x;
7312 s->mb_y = hx->s.mb_y;
7313 s->dropable = hx->s.dropable;
7314 s->picture_structure = hx->s.picture_structure;
7315 for(i = 1; i < context_count; i++)
7316 h->s.error_count += h->thread_context[i]->s.error_count;
7321 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7322 MpegEncContext * const s = &h->s;
7323 AVCodecContext * const avctx= s->avctx;
7325 H264Context *hx; ///< thread context
7326 int context_count = 0;
7328 h->max_contexts = avctx->thread_count;
7331 for(i=0; i<50; i++){
7332 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7335 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7336 h->current_slice = 0;
7337 if (!s->first_field)
7338 s->current_picture_ptr= NULL;
7350 if(buf_index >= buf_size) break;
7352 for(i = 0; i < h->nal_length_size; i++)
7353 nalsize = (nalsize << 8) | buf[buf_index++];
7354 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7359 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7364 // start code prefix search
7365 for(; buf_index + 3 < buf_size; buf_index++){
7366 // This should always succeed in the first iteration.
7367 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7371 if(buf_index+3 >= buf_size) break;
7376 hx = h->thread_context[context_count];
7378 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7379 if (ptr==NULL || dst_length < 0){
7382 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7384 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7386 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7387 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7390 if (h->is_avc && (nalsize != consumed)){
7391 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7395 buf_index += consumed;
7397 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7398 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7403 switch(hx->nal_unit_type){
7405 if (h->nal_unit_type != NAL_IDR_SLICE) {
7406 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7409 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7411 init_get_bits(&hx->s.gb, ptr, bit_length);
7413 hx->inter_gb_ptr= &hx->s.gb;
7414 hx->s.data_partitioning = 0;
7416 if((err = decode_slice_header(hx, h)))
7419 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7420 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7421 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7422 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7423 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7424 && avctx->skip_frame < AVDISCARD_ALL){
7425 if(ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU){
7426 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7427 ff_VDPAU_h264_add_data_chunk(h, start_code, sizeof(start_code));
7428 ff_VDPAU_h264_add_data_chunk(h, &buf[buf_index - consumed], consumed );
7434 init_get_bits(&hx->s.gb, ptr, bit_length);
7436 hx->inter_gb_ptr= NULL;
7437 hx->s.data_partitioning = 1;
7439 err = decode_slice_header(hx, h);
7442 init_get_bits(&hx->intra_gb, ptr, bit_length);
7443 hx->intra_gb_ptr= &hx->intra_gb;
7446 init_get_bits(&hx->inter_gb, ptr, bit_length);
7447 hx->inter_gb_ptr= &hx->inter_gb;
7449 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7450 && s->context_initialized
7452 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7453 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7454 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7455 && avctx->skip_frame < AVDISCARD_ALL)
7459 init_get_bits(&s->gb, ptr, bit_length);
7463 init_get_bits(&s->gb, ptr, bit_length);
7464 decode_seq_parameter_set(h);
7466 if(s->flags& CODEC_FLAG_LOW_DELAY)
7469 if(avctx->has_b_frames < 2)
7470 avctx->has_b_frames= !s->low_delay;
7473 init_get_bits(&s->gb, ptr, bit_length);
7475 decode_picture_parameter_set(h, bit_length);
7479 case NAL_END_SEQUENCE:
7480 case NAL_END_STREAM:
7481 case NAL_FILLER_DATA:
7483 case NAL_AUXILIARY_SLICE:
7486 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7489 if(context_count == h->max_contexts) {
7490 execute_decode_slices(h, context_count);
7495 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7497 /* Slice could not be decoded in parallel mode, copy down
7498 * NAL unit stuff to context 0 and restart. Note that
7499 * rbsp_buffer is not transferred, but since we no longer
7500 * run in parallel mode this should not be an issue. */
7501 h->nal_unit_type = hx->nal_unit_type;
7502 h->nal_ref_idc = hx->nal_ref_idc;
7508 execute_decode_slices(h, context_count);
7513 * returns the number of bytes consumed for building the current frame
7515 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7516 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7517 if(pos+10>buf_size) pos=buf_size; // oops ;)
7522 static int decode_frame(AVCodecContext *avctx,
7523 void *data, int *data_size,
7524 const uint8_t *buf, int buf_size)
7526 H264Context *h = avctx->priv_data;
7527 MpegEncContext *s = &h->s;
7528 AVFrame *pict = data;
7531 s->flags= avctx->flags;
7532 s->flags2= avctx->flags2;
7534 /* end of stream, output what is still in the buffers */
7535 if (buf_size == 0) {
7539 //FIXME factorize this with the output code below
7540 out = h->delayed_pic[0];
7542 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7543 if(h->delayed_pic[i]->poc < out->poc){
7544 out = h->delayed_pic[i];
7548 for(i=out_idx; h->delayed_pic[i]; i++)
7549 h->delayed_pic[i] = h->delayed_pic[i+1];
7552 *data_size = sizeof(AVFrame);
7553 *pict= *(AVFrame*)out;
7559 if(h->is_avc && !h->got_avcC) {
7560 int i, cnt, nalsize;
7561 unsigned char *p = avctx->extradata;
7562 if(avctx->extradata_size < 7) {
7563 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7567 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7570 /* sps and pps in the avcC always have length coded with 2 bytes,
7571 so put a fake nal_length_size = 2 while parsing them */
7572 h->nal_length_size = 2;
7573 // Decode sps from avcC
7574 cnt = *(p+5) & 0x1f; // Number of sps
7576 for (i = 0; i < cnt; i++) {
7577 nalsize = AV_RB16(p) + 2;
7578 if(decode_nal_units(h, p, nalsize) < 0) {
7579 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7584 // Decode pps from avcC
7585 cnt = *(p++); // Number of pps
7586 for (i = 0; i < cnt; i++) {
7587 nalsize = AV_RB16(p) + 2;
7588 if(decode_nal_units(h, p, nalsize) != nalsize) {
7589 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7594 // Now store right nal length size, that will be use to parse all other nals
7595 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7596 // Do not reparse avcC
7600 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7601 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7606 buf_index=decode_nal_units(h, buf, buf_size);
7610 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7611 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7612 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7616 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7617 Picture *out = s->current_picture_ptr;
7618 Picture *cur = s->current_picture_ptr;
7619 int i, pics, cross_idr, out_of_order, out_idx;
7623 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7624 s->current_picture_ptr->pict_type= s->pict_type;
7627 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7628 h->prev_poc_msb= h->poc_msb;
7629 h->prev_poc_lsb= h->poc_lsb;
7631 h->prev_frame_num_offset= h->frame_num_offset;
7632 h->prev_frame_num= h->frame_num;
7634 if (ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU)
7635 ff_VDPAU_h264_picture_complete(h);
7638 * FIXME: Error handling code does not seem to support interlaced
7639 * when slices span multiple rows
7640 * The ff_er_add_slice calls don't work right for bottom
7641 * fields; they cause massive erroneous error concealing
7642 * Error marking covers both fields (top and bottom).
7643 * This causes a mismatched s->error_count
7644 * and a bad error table. Further, the error count goes to
7645 * INT_MAX when called for bottom field, because mb_y is
7646 * past end by one (callers fault) and resync_mb_y != 0
7647 * causes problems for the first MB line, too.
7649 if (avctx->codec_id != CODEC_ID_H264_VDPAU && !FIELD_PICTURE)
7654 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7655 /* Wait for second field. */
7659 cur->repeat_pict = 0;
7661 /* Signal interlacing information externally. */
7662 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7663 if(h->sps.pic_struct_present_flag){
7664 switch (h->sei_pic_struct)
7666 case SEI_PIC_STRUCT_FRAME:
7667 cur->interlaced_frame = 0;
7669 case SEI_PIC_STRUCT_TOP_FIELD:
7670 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7671 case SEI_PIC_STRUCT_TOP_BOTTOM:
7672 case SEI_PIC_STRUCT_BOTTOM_TOP:
7673 cur->interlaced_frame = 1;
7675 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7676 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7677 // Signal the possibility of telecined film externally (pic_struct 5,6)
7678 // From these hints, let the applications decide if they apply deinterlacing.
7679 cur->repeat_pict = 1;
7680 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7682 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7683 // Force progressive here, as doubling interlaced frame is a bad idea.
7684 cur->interlaced_frame = 0;
7685 cur->repeat_pict = 2;
7687 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7688 cur->interlaced_frame = 0;
7689 cur->repeat_pict = 4;
7693 /* Derive interlacing flag from used decoding process. */
7694 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7697 if (cur->field_poc[0] != cur->field_poc[1]){
7698 /* Derive top_field_first from field pocs. */
7699 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7701 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7702 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7703 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7704 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7705 cur->top_field_first = 1;
7707 cur->top_field_first = 0;
7709 /* Most likely progressive */
7710 cur->top_field_first = 0;
7714 //FIXME do something with unavailable reference frames
7716 /* Sort B-frames into display order */
7718 if(h->sps.bitstream_restriction_flag
7719 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7720 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7724 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7725 && !h->sps.bitstream_restriction_flag){
7726 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7731 while(h->delayed_pic[pics]) pics++;
7733 assert(pics <= MAX_DELAYED_PIC_COUNT);
7735 h->delayed_pic[pics++] = cur;
7736 if(cur->reference == 0)
7737 cur->reference = DELAYED_PIC_REF;
7739 out = h->delayed_pic[0];
7741 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7742 if(h->delayed_pic[i]->poc < out->poc){
7743 out = h->delayed_pic[i];
7746 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7748 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7750 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7752 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7754 ((!cross_idr && out->poc > h->outputed_poc + 2)
7755 || cur->pict_type == FF_B_TYPE)))
7758 s->avctx->has_b_frames++;
7761 if(out_of_order || pics > s->avctx->has_b_frames){
7762 out->reference &= ~DELAYED_PIC_REF;
7763 for(i=out_idx; h->delayed_pic[i]; i++)
7764 h->delayed_pic[i] = h->delayed_pic[i+1];
7766 if(!out_of_order && pics > s->avctx->has_b_frames){
7767 *data_size = sizeof(AVFrame);
7769 h->outputed_poc = out->poc;
7770 *pict= *(AVFrame*)out;
7772 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7777 assert(pict->data[0] || !*data_size);
7778 ff_print_debug_info(s, pict);
7779 //printf("out %d\n", (int)pict->data[0]);
7782 /* Return the Picture timestamp as the frame number */
7783 /* we subtract 1 because it is added on utils.c */
7784 avctx->frame_number = s->picture_number - 1;
7786 return get_consumed_bytes(s, buf_index, buf_size);
7789 static inline void fill_mb_avail(H264Context *h){
7790 MpegEncContext * const s = &h->s;
7791 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7794 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7795 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7796 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7802 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7803 h->mb_avail[4]= 1; //FIXME move out
7804 h->mb_avail[5]= 0; //FIXME move out
7812 #define SIZE (COUNT*40)
7818 // int int_temp[10000];
7820 AVCodecContext avctx;
7822 dsputil_init(&dsp, &avctx);
7824 init_put_bits(&pb, temp, SIZE);
7825 printf("testing unsigned exp golomb\n");
7826 for(i=0; i<COUNT; i++){
7828 set_ue_golomb(&pb, i);
7829 STOP_TIMER("set_ue_golomb");
7831 flush_put_bits(&pb);
7833 init_get_bits(&gb, temp, 8*SIZE);
7834 for(i=0; i<COUNT; i++){
7837 s= show_bits(&gb, 24);
7840 j= get_ue_golomb(&gb);
7842 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7845 STOP_TIMER("get_ue_golomb");
7849 init_put_bits(&pb, temp, SIZE);
7850 printf("testing signed exp golomb\n");
7851 for(i=0; i<COUNT; i++){
7853 set_se_golomb(&pb, i - COUNT/2);
7854 STOP_TIMER("set_se_golomb");
7856 flush_put_bits(&pb);
7858 init_get_bits(&gb, temp, 8*SIZE);
7859 for(i=0; i<COUNT; i++){
7862 s= show_bits(&gb, 24);
7865 j= get_se_golomb(&gb);
7866 if(j != i - COUNT/2){
7867 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7870 STOP_TIMER("get_se_golomb");
7874 printf("testing 4x4 (I)DCT\n");
7877 uint8_t src[16], ref[16];
7878 uint64_t error= 0, max_error=0;
7880 for(i=0; i<COUNT; i++){
7882 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7883 for(j=0; j<16; j++){
7884 ref[j]= random()%255;
7885 src[j]= random()%255;
7888 h264_diff_dct_c(block, src, ref, 4);
7891 for(j=0; j<16; j++){
7892 // printf("%d ", block[j]);
7893 block[j]= block[j]*4;
7894 if(j&1) block[j]= (block[j]*4 + 2)/5;
7895 if(j&4) block[j]= (block[j]*4 + 2)/5;
7899 s->dsp.h264_idct_add(ref, block, 4);
7900 /* for(j=0; j<16; j++){
7901 printf("%d ", ref[j]);
7905 for(j=0; j<16; j++){
7906 int diff= FFABS(src[j] - ref[j]);
7909 max_error= FFMAX(max_error, diff);
7912 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7913 printf("testing quantizer\n");
7914 for(qp=0; qp<52; qp++){
7916 src1_block[i]= src2_block[i]= random()%255;
7919 printf("Testing NAL layer\n");
7921 uint8_t bitstream[COUNT];
7922 uint8_t nal[COUNT*2];
7924 memset(&h, 0, sizeof(H264Context));
7926 for(i=0; i<COUNT; i++){
7934 for(j=0; j<COUNT; j++){
7935 bitstream[j]= (random() % 255) + 1;
7938 for(j=0; j<zeros; j++){
7939 int pos= random() % COUNT;
7940 while(bitstream[pos] == 0){
7949 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7951 printf("encoding failed\n");
7955 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7959 if(out_length != COUNT){
7960 printf("incorrect length %d %d\n", out_length, COUNT);
7964 if(consumed != nal_length){
7965 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7969 if(memcmp(bitstream, out, COUNT)){
7970 printf("mismatch\n");
7976 printf("Testing RBSP\n");
7984 static av_cold int decode_end(AVCodecContext *avctx)
7986 H264Context *h = avctx->priv_data;
7987 MpegEncContext *s = &h->s;
7990 av_freep(&h->rbsp_buffer[0]);
7991 av_freep(&h->rbsp_buffer[1]);
7992 free_tables(h); //FIXME cleanup init stuff perhaps
7994 for(i = 0; i < MAX_SPS_COUNT; i++)
7995 av_freep(h->sps_buffers + i);
7997 for(i = 0; i < MAX_PPS_COUNT; i++)
7998 av_freep(h->pps_buffers + i);
8002 // memset(h, 0, sizeof(H264Context));
8008 AVCodec h264_decoder = {
8012 sizeof(H264Context),
8017 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8019 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8022 #ifdef CONFIG_H264_VDPAU_DECODER
8023 AVCodec h264_vdpau_decoder = {
8026 CODEC_ID_H264_VDPAU,
8027 sizeof(H264Context),
8032 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8034 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),