2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
42 #include "x86/h264_i386.h"
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
88 return (a&0xFFFF) + (b<<16);
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
254 if(!(top_type & type_mask))
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
270 if(!(left_type[i] & type_mask))
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
570 if(!(h->top_samples_available&0x8000)){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if((h->left_samples_available&0x8080) != 0x8080){
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
780 }else if(top_ref==ref){
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
886 pred_motion(h, 0, 4, 0, 0, mx, my);
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
945 map[list][old_ref] = cur_ref;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
996 assert(h->ref_list[1][0].reference&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1029 }else{ // AFR/FR -> AFR/FR
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1061 if(h->direct_spatial_mv_pred){
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1073 if(refc == PART_NOT_AVAILABLE)
1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1085 for(list=0; list<2; list++){
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1089 mv[list][0] = mv[list][1] = 0;
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
1097 }else if(ref[0] < 0){
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1104 for(i8=0; i8<4; i8++){
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1113 h->sub_mb_type[i8] = sub_mb_type;
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[y8])
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1121 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1131 }else if(IS_16X16(*mb_type)){
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0])
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1139 && (h->x264_build>33 || !h->x264_build)))){
1141 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1157 h->sub_mb_type[i8] = sub_mb_type;
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1167 && (h->x264_build>33 || !h->x264_build)))){
1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1169 if(IS_SUB_8X8(sub_mb_type)){
1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 for(i4=0; i4<4; i4++){
1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1211 const int16_t (*l1mv)[2]= l1mv0;
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1215 h->sub_mb_type[i8] = sub_mb_type;
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type)){
1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col[0])){
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1258 const int scale = dist_scale_factor[ref0];
1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
1275 const int16_t (*l1mv)[2]= l1mv0;
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1279 h->sub_mb_type[i8] = sub_mb_type;
1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col[0])){
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1290 ref0 = map_col_to_list0[0][ref0];
1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1295 scale = dist_scale_factor[ref0];
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1298 if(IS_SUB_8X8(sub_mb_type)){
1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1305 for(i4=0; i4<4; i4++){
1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1318 static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1327 for(list=0; list<h->list_count; list++){
1329 if(!USES_LIST(mb_type, list))
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1336 if( h->pps.cabac ) {
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1356 if(IS_8X8(mb_type)){
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1370 // src[0]&0x80; //forbidden bit
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1383 for(i=0; i+1<length; i+=9){
1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1387 for(i=0; i+1<length; i+=5){
1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1391 if(i>0 && !src[i]) i--;
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1401 /* startcode, so we must be past the end */
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1417 dst= h->rbsp_buffer[bufidx];
1423 //printf("decoding esc\n");
1424 memcpy(dst, src, i);
1427 //remove escapes (very rare 1:2^22)
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
1432 if(src[si+2]==3){ //escape
1437 }else //next start code
1441 dst[di++]= src[si++];
1444 dst[di++]= src[si++];
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1450 *consumed= si + 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 //memset(block, 64, 2*256);
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1551 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1573 static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1599 return h->pps.chroma_qp_table[t][qscale];
1602 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1609 const int luma_xy= (mx&3) + ((my&3)<<2);
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
1617 const int pic_width = 16*s->mb_width;
1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1640 // chroma offset when predicting from a field of opposite parity
1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1649 src_cb= s->edge_emu_buffer;
1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cr= s->edge_emu_buffer;
1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1673 x_offset += 8*s->mb_x;
1674 y_offset += 8*(s->mb_y >> MB_FIELD);
1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1683 chroma_op= chroma_avg;
1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1694 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1706 x_offset += 8*s->mb_x;
1707 y_offset += 8*(s->mb_y >> MB_FIELD);
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1761 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1779 static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1795 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1799 MpegEncContext * const s = &h->s;
1800 const int mb_xy= h->mb_xy;
1801 const int mb_type= s->current_picture.mb_type[mb_xy];
1803 assert(IS_INTER(mb_type));
1805 prefetch_motion(h, 0);
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1810 &weight_op[0], &weight_avg[0],
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1819 &weight_op[1], &weight_avg[1],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1828 &weight_op[2], &weight_avg[2],
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1833 assert(IS_8X8(mb_type));
1836 const int sub_mb_type= h->sub_mb_type[i];
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1844 &weight_op[3], &weight_avg[3],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1853 &weight_op[4], &weight_avg[4],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[5], &weight_avg[5],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1866 assert(IS_SUB_4X4(sub_mb_type));
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1872 &weight_op[6], &weight_avg[6],
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1879 prefetch_motion(h, 1);
1882 static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1907 static av_cold void decode_init_vlc(void){
1908 static int done = 0;
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1918 &chroma_dc_coeff_token_len [0], 1, 1,
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1927 &coeff_token_len [i][0], 1, 1,
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
1948 for(i=0; i<15; i++){
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
1953 &total_zeros_len [i][0], 1, 1,
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1970 &run_len [6][0], 1, 1,
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context *h){
1981 av_freep(&h->intra4x4_pred_mode);
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
1986 av_freep(&h->direct_table);
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
1994 for(i = 0; i < MAX_THREADS; i++) {
1995 hx = h->thread_context[i];
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2003 static void init_dequant8_coeff_table(H264Context *h){
2005 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2006 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2007 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2009 for(i=0; i<2; i++ ){
2010 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2011 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2015 for(q=0; q<52; q++){
2016 int shift = div6[q];
2019 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2020 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2021 h->pps.scaling_matrix8[i][x]) << shift;
2026 static void init_dequant4_coeff_table(H264Context *h){
2028 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2029 for(i=0; i<6; i++ ){
2030 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2032 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2040 for(q=0; q<52; q++){
2041 int shift = div6[q] + 2;
2044 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2045 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2046 h->pps.scaling_matrix4[i][x]) << shift;
2051 static void init_dequant_tables(H264Context *h){
2053 init_dequant4_coeff_table(h);
2054 if(h->pps.transform_8x8_mode)
2055 init_dequant8_coeff_table(h);
2056 if(h->sps.transform_bypass){
2059 h->dequant4_coeff[i][0][x] = 1<<6;
2060 if(h->pps.transform_8x8_mode)
2063 h->dequant8_coeff[i][0][x] = 1<<6;
2070 * needs width/height
2072 static int alloc_tables(H264Context *h){
2073 MpegEncContext * const s = &h->s;
2074 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2077 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2079 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2080 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2081 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2083 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2084 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2088 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2089 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2091 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2092 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2093 for(y=0; y<s->mb_height; y++){
2094 for(x=0; x<s->mb_width; x++){
2095 const int mb_xy= x + y*s->mb_stride;
2096 const int b_xy = 4*x + 4*y*h->b_stride;
2097 const int b8_xy= 2*x + 2*y*h->b8_stride;
2099 h->mb2b_xy [mb_xy]= b_xy;
2100 h->mb2b8_xy[mb_xy]= b8_xy;
2104 s->obmc_scratchpad = NULL;
2106 if(!h->dequant4_coeff[0])
2107 init_dequant_tables(h);
2116 * Mimic alloc_tables(), but for every context thread.
2118 static void clone_tables(H264Context *dst, H264Context *src){
2119 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2120 dst->non_zero_count = src->non_zero_count;
2121 dst->slice_table = src->slice_table;
2122 dst->cbp_table = src->cbp_table;
2123 dst->mb2b_xy = src->mb2b_xy;
2124 dst->mb2b8_xy = src->mb2b8_xy;
2125 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2126 dst->mvd_table[0] = src->mvd_table[0];
2127 dst->mvd_table[1] = src->mvd_table[1];
2128 dst->direct_table = src->direct_table;
2130 dst->s.obmc_scratchpad = NULL;
2131 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2136 * Allocate buffers which are not shared amongst multiple threads.
2138 static int context_init(H264Context *h){
2139 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2140 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2144 return -1; // free_tables will clean up for us
2147 static av_cold void common_init(H264Context *h){
2148 MpegEncContext * const s = &h->s;
2150 s->width = s->avctx->width;
2151 s->height = s->avctx->height;
2152 s->codec_id= s->avctx->codec->id;
2154 ff_h264_pred_init(&h->hpc, s->codec_id);
2156 h->dequant_coeff_pps= -1;
2157 s->unrestricted_mv=1;
2158 s->decode=1; //FIXME
2160 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2162 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2163 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2167 * Reset SEI values at the beginning of the frame.
2169 * @param h H.264 context.
2171 static void reset_sei(H264Context *h) {
2172 h->sei_recovery_frame_cnt = -1;
2173 h->sei_dpb_output_delay = 0;
2174 h->sei_cpb_removal_delay = -1;
2175 h->sei_buffering_period_present = 0;
2178 static av_cold int decode_init(AVCodecContext *avctx){
2179 H264Context *h= avctx->priv_data;
2180 MpegEncContext * const s = &h->s;
2182 MPV_decode_defaults(s);
2187 s->out_format = FMT_H264;
2188 s->workaround_bugs= avctx->workaround_bugs;
2191 // s->decode_mb= ff_h263_decode_mb;
2192 s->quarter_sample = 1;
2193 if(!avctx->has_b_frames)
2196 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2197 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2199 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2200 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2204 if(avctx->extradata_size > 0 && avctx->extradata &&
2205 *(char *)avctx->extradata == 1){
2212 h->thread_context[0] = h;
2213 h->outputed_poc = INT_MIN;
2214 h->prev_poc_msb= 1<<16;
2216 if(avctx->codec_id == CODEC_ID_H264){
2217 if(avctx->ticks_per_frame == 1){
2218 s->avctx->time_base.den *=2;
2220 avctx->ticks_per_frame = 2;
2225 static int frame_start(H264Context *h){
2226 MpegEncContext * const s = &h->s;
2229 if(MPV_frame_start(s, s->avctx) < 0)
2231 ff_er_frame_start(s);
2233 * MPV_frame_start uses pict_type to derive key_frame.
2234 * This is incorrect for H.264; IDR markings must be used.
2235 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2236 * See decode_nal_units().
2238 s->current_picture_ptr->key_frame= 0;
2240 assert(s->linesize && s->uvlinesize);
2242 for(i=0; i<16; i++){
2243 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2244 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2247 h->block_offset[16+i]=
2248 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2249 h->block_offset[24+16+i]=
2250 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2253 /* can't be in alloc_tables because linesize isn't known there.
2254 * FIXME: redo bipred weight to not require extra buffer? */
2255 for(i = 0; i < s->avctx->thread_count; i++)
2256 if(!h->thread_context[i]->s.obmc_scratchpad)
2257 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2259 /* some macroblocks will be accessed before they're available */
2260 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2261 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2263 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2265 // We mark the current picture as non-reference after allocating it, so
2266 // that if we break out due to an error it can be released automatically
2267 // in the next MPV_frame_start().
2268 // SVQ3 as well as most other codecs have only last/next/current and thus
2269 // get released even with set reference, besides SVQ3 and others do not
2270 // mark frames as reference later "naturally".
2271 if(s->codec_id != CODEC_ID_SVQ3)
2272 s->current_picture_ptr->reference= 0;
2274 s->current_picture_ptr->field_poc[0]=
2275 s->current_picture_ptr->field_poc[1]= INT_MAX;
2276 assert(s->current_picture_ptr->long_ref==0);
2281 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2282 MpegEncContext * const s = &h->s;
2291 src_cb -= uvlinesize;
2292 src_cr -= uvlinesize;
2294 if(!simple && FRAME_MBAFF){
2296 offset = MB_MBAFF ? 1 : 17;
2297 uvoffset= MB_MBAFF ? 1 : 9;
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2301 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2308 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2309 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2310 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2311 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2317 top_idx = MB_MBAFF ? 0 : 1;
2319 step= MB_MBAFF ? 2 : 1;
2322 // There are two lines saved, the line above the the top macroblock of a pair,
2323 // and the line above the bottom macroblock
2324 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2325 for(i=1; i<17 - skiplast; i++){
2326 h->left_border[offset+i*step]= src_y[15+i* linesize];
2329 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2330 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2332 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2333 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2334 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2335 for(i=1; i<9 - skiplast; i++){
2336 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2337 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2339 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2340 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2344 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2345 MpegEncContext * const s = &h->s;
2356 if(!simple && FRAME_MBAFF){
2358 offset = MB_MBAFF ? 1 : 17;
2359 uvoffset= MB_MBAFF ? 1 : 9;
2363 top_idx = MB_MBAFF ? 0 : 1;
2365 step= MB_MBAFF ? 2 : 1;
2368 if(h->deblocking_filter == 2) {
2370 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2371 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2373 deblock_left = (s->mb_x > 0);
2374 deblock_top = (s->mb_y > !!MB_FIELD);
2377 src_y -= linesize + 1;
2378 src_cb -= uvlinesize + 1;
2379 src_cr -= uvlinesize + 1;
2381 #define XCHG(a,b,t,xchg)\
2388 for(i = !deblock_top; i<16; i++){
2389 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2391 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2396 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2397 if(s->mb_x+1 < s->mb_width){
2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2402 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2404 for(i = !deblock_top; i<8; i++){
2405 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2406 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2408 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2409 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2412 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2413 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2418 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2419 MpegEncContext * const s = &h->s;
2420 const int mb_x= s->mb_x;
2421 const int mb_y= s->mb_y;
2422 const int mb_xy= h->mb_xy;
2423 const int mb_type= s->current_picture.mb_type[mb_xy];
2424 uint8_t *dest_y, *dest_cb, *dest_cr;
2425 int linesize, uvlinesize /*dct_offset*/;
2427 int *block_offset = &h->block_offset[0];
2428 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2429 /* is_h264 should always be true if SVQ3 is disabled. */
2430 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2431 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2432 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2434 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2435 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2436 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2438 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2439 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2441 if (!simple && MB_FIELD) {
2442 linesize = h->mb_linesize = s->linesize * 2;
2443 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2444 block_offset = &h->block_offset[24];
2445 if(mb_y&1){ //FIXME move out of this function?
2446 dest_y -= s->linesize*15;
2447 dest_cb-= s->uvlinesize*7;
2448 dest_cr-= s->uvlinesize*7;
2452 for(list=0; list<h->list_count; list++){
2453 if(!USES_LIST(mb_type, list))
2455 if(IS_16X16(mb_type)){
2456 int8_t *ref = &h->ref_cache[list][scan8[0]];
2457 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2459 for(i=0; i<16; i+=4){
2460 int ref = h->ref_cache[list][scan8[i]];
2462 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2468 linesize = h->mb_linesize = s->linesize;
2469 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2470 // dct_offset = s->linesize * 16;
2473 if (!simple && IS_INTRA_PCM(mb_type)) {
2474 for (i=0; i<16; i++) {
2475 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2477 for (i=0; i<8; i++) {
2478 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2479 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2482 if(IS_INTRA(mb_type)){
2483 if(h->deblocking_filter)
2484 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2486 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2487 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2488 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2491 if(IS_INTRA4x4(mb_type)){
2492 if(simple || !s->encoding){
2493 if(IS_8x8DCT(mb_type)){
2494 if(transform_bypass){
2496 idct_add = s->dsp.add_pixels8;
2498 idct_dc_add = s->dsp.h264_idct8_dc_add;
2499 idct_add = s->dsp.h264_idct8_add;
2501 for(i=0; i<16; i+=4){
2502 uint8_t * const ptr= dest_y + block_offset[i];
2503 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2504 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2505 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2507 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2508 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2509 (h->topright_samples_available<<i)&0x4000, linesize);
2511 if(nnz == 1 && h->mb[i*16])
2512 idct_dc_add(ptr, h->mb + i*16, linesize);
2514 idct_add (ptr, h->mb + i*16, linesize);
2519 if(transform_bypass){
2521 idct_add = s->dsp.add_pixels4;
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2526 for(i=0; i<16; i++){
2527 uint8_t * const ptr= dest_y + block_offset[i];
2528 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2530 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2531 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2535 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2536 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2537 assert(mb_y || linesize <= block_offset[i]);
2538 if(!topright_avail){
2539 tr= ptr[3 - linesize]*0x01010101;
2540 topright= (uint8_t*) &tr;
2542 topright= ptr + 4 - linesize;
2546 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2547 nnz = h->non_zero_count_cache[ scan8[i] ];
2550 if(nnz == 1 && h->mb[i*16])
2551 idct_dc_add(ptr, h->mb + i*16, linesize);
2553 idct_add (ptr, h->mb + i*16, linesize);
2555 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2562 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2564 if(!transform_bypass)
2565 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2567 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2569 if(h->deblocking_filter)
2570 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2572 hl_motion(h, dest_y, dest_cb, dest_cr,
2573 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2574 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2575 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2579 if(!IS_INTRA4x4(mb_type)){
2581 if(IS_INTRA16x16(mb_type)){
2582 if(transform_bypass){
2583 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2584 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2586 for(i=0; i<16; i++){
2587 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2588 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2592 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2594 }else if(h->cbp&15){
2595 if(transform_bypass){
2596 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2597 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2598 for(i=0; i<16; i+=di){
2599 if(h->non_zero_count_cache[ scan8[i] ]){
2600 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2604 if(IS_8x8DCT(mb_type)){
2605 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2607 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2612 for(i=0; i<16; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2614 uint8_t * const ptr= dest_y + block_offset[i];
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2621 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2622 uint8_t *dest[2] = {dest_cb, dest_cr};
2623 if(transform_bypass){
2624 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2625 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2626 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2628 idct_add = s->dsp.add_pixels4;
2629 for(i=16; i<16+8; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2631 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2635 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2636 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2638 idct_add = s->dsp.h264_idct_add;
2639 idct_dc_add = s->dsp.h264_idct_dc_add;
2640 for(i=16; i<16+8; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2647 for(i=16; i<16+8; i++){
2648 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2649 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2650 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2657 if(h->cbp || IS_INTRA(mb_type))
2658 s->dsp.clear_blocks(h->mb);
2660 if(h->deblocking_filter) {
2661 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2662 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2663 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2664 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2665 if (!simple && FRAME_MBAFF) {
2666 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2668 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2674 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2676 static void hl_decode_mb_simple(H264Context *h){
2677 hl_decode_mb_internal(h, 1);
2681 * Process a macroblock; this handles edge cases, such as interlacing.
2683 static void av_noinline hl_decode_mb_complex(H264Context *h){
2684 hl_decode_mb_internal(h, 0);
2687 static void hl_decode_mb(H264Context *h){
2688 MpegEncContext * const s = &h->s;
2689 const int mb_xy= h->mb_xy;
2690 const int mb_type= s->current_picture.mb_type[mb_xy];
2691 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2694 hl_decode_mb_complex(h);
2695 else hl_decode_mb_simple(h);
2698 static void pic_as_field(Picture *pic, const int parity){
2700 for (i = 0; i < 4; ++i) {
2701 if (parity == PICT_BOTTOM_FIELD)
2702 pic->data[i] += pic->linesize[i];
2703 pic->reference = parity;
2704 pic->linesize[i] *= 2;
2706 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2709 static int split_field_copy(Picture *dest, Picture *src,
2710 int parity, int id_add){
2711 int match = !!(src->reference & parity);
2715 if(parity != PICT_FRAME){
2716 pic_as_field(dest, parity);
2718 dest->pic_id += id_add;
2725 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2729 while(i[0]<len || i[1]<len){
2730 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2732 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2735 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2736 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2739 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2740 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2747 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2752 best_poc= dir ? INT_MIN : INT_MAX;
2754 for(i=0; i<len; i++){
2755 const int poc= src[i]->poc;
2756 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2758 sorted[out_i]= src[i];
2761 if(best_poc == (dir ? INT_MIN : INT_MAX))
2763 limit= sorted[out_i++]->poc - dir;
2769 * fills the default_ref_list.
2771 static int fill_default_ref_list(H264Context *h){
2772 MpegEncContext * const s = &h->s;
2775 if(h->slice_type_nos==FF_B_TYPE){
2776 Picture *sorted[32];
2781 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2783 cur_poc= s->current_picture_ptr->poc;
2785 for(list= 0; list<2; list++){
2786 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2787 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2789 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2790 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2793 if(len < h->ref_count[list])
2794 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2798 if(lens[0] == lens[1] && lens[1] > 1){
2799 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2801 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2804 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2805 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2807 if(len < h->ref_count[0])
2808 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2811 for (i=0; i<h->ref_count[0]; i++) {
2812 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2814 if(h->slice_type_nos==FF_B_TYPE){
2815 for (i=0; i<h->ref_count[1]; i++) {
2816 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2823 static void print_short_term(H264Context *h);
2824 static void print_long_term(H264Context *h);
2827 * Extract structure information about the picture described by pic_num in
2828 * the current decoding context (frame or field). Note that pic_num is
2829 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2830 * @param pic_num picture number for which to extract structure information
2831 * @param structure one of PICT_XXX describing structure of picture
2833 * @return frame number (short term) or long term index of picture
2834 * described by pic_num
2836 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2837 MpegEncContext * const s = &h->s;
2839 *structure = s->picture_structure;
2842 /* opposite field */
2843 *structure ^= PICT_FRAME;
2850 static int decode_ref_pic_list_reordering(H264Context *h){
2851 MpegEncContext * const s = &h->s;
2852 int list, index, pic_structure;
2854 print_short_term(h);
2857 for(list=0; list<h->list_count; list++){
2858 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2860 if(get_bits1(&s->gb)){
2861 int pred= h->curr_pic_num;
2863 for(index=0; ; index++){
2864 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2865 unsigned int pic_id;
2867 Picture *ref = NULL;
2869 if(reordering_of_pic_nums_idc==3)
2872 if(index >= h->ref_count[list]){
2873 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2877 if(reordering_of_pic_nums_idc<3){
2878 if(reordering_of_pic_nums_idc<2){
2879 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2882 if(abs_diff_pic_num > h->max_pic_num){
2883 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2887 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2888 else pred+= abs_diff_pic_num;
2889 pred &= h->max_pic_num - 1;
2891 frame_num = pic_num_extract(h, pred, &pic_structure);
2893 for(i= h->short_ref_count-1; i>=0; i--){
2894 ref = h->short_ref[i];
2895 assert(ref->reference);
2896 assert(!ref->long_ref);
2898 ref->frame_num == frame_num &&
2899 (ref->reference & pic_structure)
2907 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2909 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2912 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2915 ref = h->long_ref[long_idx];
2916 assert(!(ref && !ref->reference));
2917 if(ref && (ref->reference & pic_structure)){
2918 ref->pic_id= pic_id;
2919 assert(ref->long_ref);
2927 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2928 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2930 for(i=index; i+1<h->ref_count[list]; i++){
2931 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2934 for(; i > index; i--){
2935 h->ref_list[list][i]= h->ref_list[list][i-1];
2937 h->ref_list[list][index]= *ref;
2939 pic_as_field(&h->ref_list[list][index], pic_structure);
2943 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2949 for(list=0; list<h->list_count; list++){
2950 for(index= 0; index < h->ref_count[list]; index++){
2951 if(!h->ref_list[list][index].data[0]){
2952 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2953 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2961 static void fill_mbaff_ref_list(H264Context *h){
2963 for(list=0; list<2; list++){ //FIXME try list_count
2964 for(i=0; i<h->ref_count[list]; i++){
2965 Picture *frame = &h->ref_list[list][i];
2966 Picture *field = &h->ref_list[list][16+2*i];
2969 field[0].linesize[j] <<= 1;
2970 field[0].reference = PICT_TOP_FIELD;
2971 field[0].poc= field[0].field_poc[0];
2972 field[1] = field[0];
2974 field[1].data[j] += frame->linesize[j];
2975 field[1].reference = PICT_BOTTOM_FIELD;
2976 field[1].poc= field[1].field_poc[1];
2978 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2979 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2981 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2982 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2986 for(j=0; j<h->ref_count[1]; j++){
2987 for(i=0; i<h->ref_count[0]; i++)
2988 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2989 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2990 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2994 static int pred_weight_table(H264Context *h){
2995 MpegEncContext * const s = &h->s;
2997 int luma_def, chroma_def;
3000 h->use_weight_chroma= 0;
3001 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3002 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3003 luma_def = 1<<h->luma_log2_weight_denom;
3004 chroma_def = 1<<h->chroma_log2_weight_denom;
3006 for(list=0; list<2; list++){
3007 h->luma_weight_flag[list] = 0;
3008 h->chroma_weight_flag[list] = 0;
3009 for(i=0; i<h->ref_count[list]; i++){
3010 int luma_weight_flag, chroma_weight_flag;
3012 luma_weight_flag= get_bits1(&s->gb);
3013 if(luma_weight_flag){
3014 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3015 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3016 if( h->luma_weight[list][i] != luma_def
3017 || h->luma_offset[list][i] != 0) {
3019 h->luma_weight_flag[list]= 1;
3022 h->luma_weight[list][i]= luma_def;
3023 h->luma_offset[list][i]= 0;
3027 chroma_weight_flag= get_bits1(&s->gb);
3028 if(chroma_weight_flag){
3031 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3032 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3033 if( h->chroma_weight[list][i][j] != chroma_def
3034 || h->chroma_offset[list][i][j] != 0) {
3035 h->use_weight_chroma= 1;
3036 h->chroma_weight_flag[list]= 1;
3042 h->chroma_weight[list][i][j]= chroma_def;
3043 h->chroma_offset[list][i][j]= 0;
3048 if(h->slice_type_nos != FF_B_TYPE) break;
3050 h->use_weight= h->use_weight || h->use_weight_chroma;
3054 static void implicit_weight_table(H264Context *h){
3055 MpegEncContext * const s = &h->s;
3057 int cur_poc = s->current_picture_ptr->poc;
3059 for (i = 0; i < 2; i++) {
3060 h->luma_weight_flag[i] = 0;
3061 h->chroma_weight_flag[i] = 0;
3064 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3065 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3067 h->use_weight_chroma= 0;
3072 h->use_weight_chroma= 2;
3073 h->luma_log2_weight_denom= 5;
3074 h->chroma_log2_weight_denom= 5;
3076 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3077 int poc0 = h->ref_list[0][ref0].poc;
3078 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3079 int poc1 = h->ref_list[1][ref1].poc;
3080 int td = av_clip(poc1 - poc0, -128, 127);
3082 int tb = av_clip(cur_poc - poc0, -128, 127);
3083 int tx = (16384 + (FFABS(td) >> 1)) / td;
3084 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3085 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3086 h->implicit_weight[ref0][ref1] = 32;
3088 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3090 h->implicit_weight[ref0][ref1] = 32;
3096 * Mark a picture as no longer needed for reference. The refmask
3097 * argument allows unreferencing of individual fields or the whole frame.
3098 * If the picture becomes entirely unreferenced, but is being held for
3099 * display purposes, it is marked as such.
3100 * @param refmask mask of fields to unreference; the mask is bitwise
3101 * anded with the reference marking of pic
3102 * @return non-zero if pic becomes entirely unreferenced (except possibly
3103 * for display purposes) zero if one of the fields remains in
3106 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3108 if (pic->reference &= refmask) {
3111 for(i = 0; h->delayed_pic[i]; i++)
3112 if(pic == h->delayed_pic[i]){
3113 pic->reference=DELAYED_PIC_REF;
3121 * instantaneous decoder refresh.
3123 static void idr(H264Context *h){
3126 for(i=0; i<16; i++){
3127 remove_long(h, i, 0);
3129 assert(h->long_ref_count==0);
3131 for(i=0; i<h->short_ref_count; i++){
3132 unreference_pic(h, h->short_ref[i], 0);
3133 h->short_ref[i]= NULL;
3135 h->short_ref_count=0;
3136 h->prev_frame_num= 0;
3137 h->prev_frame_num_offset= 0;
3142 /* forget old pics after a seek */
3143 static void flush_dpb(AVCodecContext *avctx){
3144 H264Context *h= avctx->priv_data;
3146 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3147 if(h->delayed_pic[i])
3148 h->delayed_pic[i]->reference= 0;
3149 h->delayed_pic[i]= NULL;
3151 h->outputed_poc= INT_MIN;
3153 if(h->s.current_picture_ptr)
3154 h->s.current_picture_ptr->reference= 0;
3155 h->s.first_field= 0;
3157 ff_mpeg_flush(avctx);
3161 * Find a Picture in the short term reference list by frame number.
3162 * @param frame_num frame number to search for
3163 * @param idx the index into h->short_ref where returned picture is found
3164 * undefined if no picture found.
3165 * @return pointer to the found picture, or NULL if no pic with the provided
3166 * frame number is found
3168 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3169 MpegEncContext * const s = &h->s;
3172 for(i=0; i<h->short_ref_count; i++){
3173 Picture *pic= h->short_ref[i];
3174 if(s->avctx->debug&FF_DEBUG_MMCO)
3175 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3176 if(pic->frame_num == frame_num) {
3185 * Remove a picture from the short term reference list by its index in
3186 * that list. This does no checking on the provided index; it is assumed
3187 * to be valid. Other list entries are shifted down.
3188 * @param i index into h->short_ref of picture to remove.
3190 static void remove_short_at_index(H264Context *h, int i){
3191 assert(i >= 0 && i < h->short_ref_count);
3192 h->short_ref[i]= NULL;
3193 if (--h->short_ref_count)
3194 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3199 * @return the removed picture or NULL if an error occurs
3201 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3202 MpegEncContext * const s = &h->s;
3206 if(s->avctx->debug&FF_DEBUG_MMCO)
3207 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3209 pic = find_short(h, frame_num, &i);
3211 if(unreference_pic(h, pic, ref_mask))
3212 remove_short_at_index(h, i);
3219 * Remove a picture from the long term reference list by its index in
3221 * @return the removed picture or NULL if an error occurs
3223 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3226 pic= h->long_ref[i];
3228 if(unreference_pic(h, pic, ref_mask)){
3229 assert(h->long_ref[i]->long_ref == 1);
3230 h->long_ref[i]->long_ref= 0;
3231 h->long_ref[i]= NULL;
3232 h->long_ref_count--;
3240 * print short term list
3242 static void print_short_term(H264Context *h) {
3244 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3245 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3246 for(i=0; i<h->short_ref_count; i++){
3247 Picture *pic= h->short_ref[i];
3248 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3254 * print long term list
3256 static void print_long_term(H264Context *h) {
3258 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3260 for(i = 0; i < 16; i++){
3261 Picture *pic= h->long_ref[i];
3263 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3270 * Executes the reference picture marking (memory management control operations).
3272 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3273 MpegEncContext * const s = &h->s;
3274 int i, av_uninit(j);
3275 int current_ref_assigned=0;
3276 Picture *av_uninit(pic);
3278 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3279 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3281 for(i=0; i<mmco_count; i++){
3282 int av_uninit(structure), av_uninit(frame_num);
3283 if(s->avctx->debug&FF_DEBUG_MMCO)
3284 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3286 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3287 || mmco[i].opcode == MMCO_SHORT2LONG){
3288 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3289 pic = find_short(h, frame_num, &j);
3291 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3292 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3293 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3298 switch(mmco[i].opcode){
3299 case MMCO_SHORT2UNUSED:
3300 if(s->avctx->debug&FF_DEBUG_MMCO)
3301 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3302 remove_short(h, frame_num, structure ^ PICT_FRAME);
3304 case MMCO_SHORT2LONG:
3305 if (h->long_ref[mmco[i].long_arg] != pic)
3306 remove_long(h, mmco[i].long_arg, 0);
3308 remove_short_at_index(h, j);
3309 h->long_ref[ mmco[i].long_arg ]= pic;
3310 if (h->long_ref[ mmco[i].long_arg ]){
3311 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3312 h->long_ref_count++;
3315 case MMCO_LONG2UNUSED:
3316 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3317 pic = h->long_ref[j];
3319 remove_long(h, j, structure ^ PICT_FRAME);
3320 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3321 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3324 // Comment below left from previous code as it is an interresting note.
3325 /* First field in pair is in short term list or
3326 * at a different long term index.
3327 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3328 * Report the problem and keep the pair where it is,
3329 * and mark this field valid.
3332 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3333 remove_long(h, mmco[i].long_arg, 0);
3335 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3336 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3337 h->long_ref_count++;
3340 s->current_picture_ptr->reference |= s->picture_structure;
3341 current_ref_assigned=1;
3343 case MMCO_SET_MAX_LONG:
3344 assert(mmco[i].long_arg <= 16);
3345 // just remove the long term which index is greater than new max
3346 for(j = mmco[i].long_arg; j<16; j++){
3347 remove_long(h, j, 0);
3351 while(h->short_ref_count){
3352 remove_short(h, h->short_ref[0]->frame_num, 0);
3354 for(j = 0; j < 16; j++) {
3355 remove_long(h, j, 0);
3357 s->current_picture_ptr->poc=
3358 s->current_picture_ptr->field_poc[0]=
3359 s->current_picture_ptr->field_poc[1]=
3363 s->current_picture_ptr->frame_num= 0;
3369 if (!current_ref_assigned) {
3370 /* Second field of complementary field pair; the first field of
3371 * which is already referenced. If short referenced, it
3372 * should be first entry in short_ref. If not, it must exist
3373 * in long_ref; trying to put it on the short list here is an
3374 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3376 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3377 /* Just mark the second field valid */
3378 s->current_picture_ptr->reference = PICT_FRAME;
3379 } else if (s->current_picture_ptr->long_ref) {
3380 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3381 "assignment for second field "
3382 "in complementary field pair "
3383 "(first field is long term)\n");
3385 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3387 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3390 if(h->short_ref_count)
3391 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3393 h->short_ref[0]= s->current_picture_ptr;
3394 h->short_ref_count++;
3395 s->current_picture_ptr->reference |= s->picture_structure;
3399 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3401 /* We have too many reference frames, probably due to corrupted
3402 * stream. Need to discard one frame. Prevents overrun of the
3403 * short_ref and long_ref buffers.
3405 av_log(h->s.avctx, AV_LOG_ERROR,
3406 "number of reference frames exceeds max (probably "
3407 "corrupt input), discarding one\n");
3409 if (h->long_ref_count && !h->short_ref_count) {
3410 for (i = 0; i < 16; ++i)
3415 remove_long(h, i, 0);
3417 pic = h->short_ref[h->short_ref_count - 1];
3418 remove_short(h, pic->frame_num, 0);
3422 print_short_term(h);
3427 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3428 MpegEncContext * const s = &h->s;
3432 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3433 s->broken_link= get_bits1(gb) -1;
3435 h->mmco[0].opcode= MMCO_LONG;
3436 h->mmco[0].long_arg= 0;
3440 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3441 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3442 MMCOOpcode opcode= get_ue_golomb_31(gb);
3444 h->mmco[i].opcode= opcode;
3445 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3446 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3447 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3448 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3452 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3453 unsigned int long_arg= get_ue_golomb_31(gb);
3454 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3455 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3458 h->mmco[i].long_arg= long_arg;
3461 if(opcode > (unsigned)MMCO_LONG){
3462 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3465 if(opcode == MMCO_END)
3470 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3472 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3473 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3474 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3475 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3477 if (FIELD_PICTURE) {
3478 h->mmco[0].short_pic_num *= 2;
3479 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3480 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3490 static int init_poc(H264Context *h){
3491 MpegEncContext * const s = &h->s;
3492 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3494 Picture *cur = s->current_picture_ptr;
3496 h->frame_num_offset= h->prev_frame_num_offset;
3497 if(h->frame_num < h->prev_frame_num)
3498 h->frame_num_offset += max_frame_num;
3500 if(h->sps.poc_type==0){
3501 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3503 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3504 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3505 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3506 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3508 h->poc_msb = h->prev_poc_msb;
3509 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3511 field_poc[1] = h->poc_msb + h->poc_lsb;
3512 if(s->picture_structure == PICT_FRAME)
3513 field_poc[1] += h->delta_poc_bottom;
3514 }else if(h->sps.poc_type==1){
3515 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3518 if(h->sps.poc_cycle_length != 0)
3519 abs_frame_num = h->frame_num_offset + h->frame_num;
3523 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3526 expected_delta_per_poc_cycle = 0;
3527 for(i=0; i < h->sps.poc_cycle_length; i++)
3528 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3530 if(abs_frame_num > 0){
3531 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3532 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3534 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3535 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3536 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3540 if(h->nal_ref_idc == 0)
3541 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3543 field_poc[0] = expectedpoc + h->delta_poc[0];
3544 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3546 if(s->picture_structure == PICT_FRAME)
3547 field_poc[1] += h->delta_poc[1];
3549 int poc= 2*(h->frame_num_offset + h->frame_num);
3558 if(s->picture_structure != PICT_BOTTOM_FIELD)
3559 s->current_picture_ptr->field_poc[0]= field_poc[0];
3560 if(s->picture_structure != PICT_TOP_FIELD)
3561 s->current_picture_ptr->field_poc[1]= field_poc[1];
3562 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3569 * initialize scan tables
3571 static void init_scan_tables(H264Context *h){
3572 MpegEncContext * const s = &h->s;
3574 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3575 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3576 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3578 for(i=0; i<16; i++){
3579 #define T(x) (x>>2) | ((x<<2) & 0xF)
3580 h->zigzag_scan[i] = T(zigzag_scan[i]);
3581 h-> field_scan[i] = T( field_scan[i]);
3585 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3586 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3587 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3588 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3589 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3591 for(i=0; i<64; i++){
3592 #define T(x) (x>>3) | ((x&7)<<3)
3593 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3594 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3595 h->field_scan8x8[i] = T(field_scan8x8[i]);
3596 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3600 if(h->sps.transform_bypass){ //FIXME same ugly
3601 h->zigzag_scan_q0 = zigzag_scan;
3602 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3603 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3604 h->field_scan_q0 = field_scan;
3605 h->field_scan8x8_q0 = field_scan8x8;
3606 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3608 h->zigzag_scan_q0 = h->zigzag_scan;
3609 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3610 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3611 h->field_scan_q0 = h->field_scan;
3612 h->field_scan8x8_q0 = h->field_scan8x8;
3613 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3618 * Replicates H264 "master" context to thread contexts.
3620 static void clone_slice(H264Context *dst, H264Context *src)
3622 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3623 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3624 dst->s.current_picture = src->s.current_picture;
3625 dst->s.linesize = src->s.linesize;
3626 dst->s.uvlinesize = src->s.uvlinesize;
3627 dst->s.first_field = src->s.first_field;
3629 dst->prev_poc_msb = src->prev_poc_msb;
3630 dst->prev_poc_lsb = src->prev_poc_lsb;
3631 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3632 dst->prev_frame_num = src->prev_frame_num;
3633 dst->short_ref_count = src->short_ref_count;
3635 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3636 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3637 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3638 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3640 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3641 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3645 * decodes a slice header.
3646 * This will also call MPV_common_init() and frame_start() as needed.
3648 * @param h h264context
3649 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3651 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3653 static int decode_slice_header(H264Context *h, H264Context *h0){
3654 MpegEncContext * const s = &h->s;
3655 MpegEncContext * const s0 = &h0->s;
3656 unsigned int first_mb_in_slice;
3657 unsigned int pps_id;
3658 int num_ref_idx_active_override_flag;
3659 unsigned int slice_type, tmp, i, j;
3660 int default_ref_list_done = 0;
3661 int last_pic_structure;
3663 s->dropable= h->nal_ref_idc == 0;
3665 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3666 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3667 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3669 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3670 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3673 first_mb_in_slice= get_ue_golomb(&s->gb);
3675 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3676 h0->current_slice = 0;
3677 if (!s0->first_field)
3678 s->current_picture_ptr= NULL;
3681 slice_type= get_ue_golomb_31(&s->gb);
3683 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3688 h->slice_type_fixed=1;
3690 h->slice_type_fixed=0;
3692 slice_type= golomb_to_pict_type[ slice_type ];
3693 if (slice_type == FF_I_TYPE
3694 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3695 default_ref_list_done = 1;
3697 h->slice_type= slice_type;
3698 h->slice_type_nos= slice_type & 3;
3700 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3701 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3702 av_log(h->s.avctx, AV_LOG_ERROR,
3703 "B picture before any references, skipping\n");
3707 pps_id= get_ue_golomb(&s->gb);
3708 if(pps_id>=MAX_PPS_COUNT){
3709 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3712 if(!h0->pps_buffers[pps_id]) {
3713 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
3716 h->pps= *h0->pps_buffers[pps_id];
3718 if(!h0->sps_buffers[h->pps.sps_id]) {
3719 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
3722 h->sps = *h0->sps_buffers[h->pps.sps_id];
3724 if(h == h0 && h->dequant_coeff_pps != pps_id){
3725 h->dequant_coeff_pps = pps_id;
3726 init_dequant_tables(h);
3729 s->mb_width= h->sps.mb_width;
3730 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3732 h->b_stride= s->mb_width*4;
3733 h->b8_stride= s->mb_width*2;
3735 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3736 if(h->sps.frame_mbs_only_flag)
3737 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3739 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3741 if (s->context_initialized
3742 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3744 return -1; // width / height changed during parallelized decoding
3746 flush_dpb(s->avctx);
3749 if (!s->context_initialized) {
3751 return -1; // we cant (re-)initialize context during parallel decoding
3752 if (MPV_common_init(s) < 0)
3756 init_scan_tables(h);
3759 for(i = 1; i < s->avctx->thread_count; i++) {
3761 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3762 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3763 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3766 init_scan_tables(c);
3770 for(i = 0; i < s->avctx->thread_count; i++)
3771 if(context_init(h->thread_context[i]) < 0)
3774 s->avctx->width = s->width;
3775 s->avctx->height = s->height;
3776 s->avctx->sample_aspect_ratio= h->sps.sar;
3777 if(!s->avctx->sample_aspect_ratio.den)
3778 s->avctx->sample_aspect_ratio.den = 1;
3780 if(h->sps.timing_info_present_flag){
3781 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3782 if(h->x264_build > 0 && h->x264_build < 44)
3783 s->avctx->time_base.den *= 2;
3784 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3785 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3789 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3792 h->mb_aff_frame = 0;
3793 last_pic_structure = s0->picture_structure;
3794 if(h->sps.frame_mbs_only_flag){
3795 s->picture_structure= PICT_FRAME;
3797 if(get_bits1(&s->gb)) { //field_pic_flag
3798 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3800 s->picture_structure= PICT_FRAME;
3801 h->mb_aff_frame = h->sps.mb_aff;
3804 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3806 if(h0->current_slice == 0){
3807 while(h->frame_num != h->prev_frame_num &&
3808 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3809 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3810 if (frame_start(h) < 0)
3812 h->prev_frame_num++;
3813 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3814 s->current_picture_ptr->frame_num= h->prev_frame_num;
3815 execute_ref_pic_marking(h, NULL, 0);
3818 /* See if we have a decoded first field looking for a pair... */
3819 if (s0->first_field) {
3820 assert(s0->current_picture_ptr);
3821 assert(s0->current_picture_ptr->data[0]);
3822 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3824 /* figure out if we have a complementary field pair */
3825 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3827 * Previous field is unmatched. Don't display it, but let it
3828 * remain for reference if marked as such.
3830 s0->current_picture_ptr = NULL;
3831 s0->first_field = FIELD_PICTURE;
3834 if (h->nal_ref_idc &&
3835 s0->current_picture_ptr->reference &&
3836 s0->current_picture_ptr->frame_num != h->frame_num) {
3838 * This and previous field were reference, but had
3839 * different frame_nums. Consider this field first in
3840 * pair. Throw away previous field except for reference
3843 s0->first_field = 1;
3844 s0->current_picture_ptr = NULL;
3847 /* Second field in complementary pair */
3848 s0->first_field = 0;
3853 /* Frame or first field in a potentially complementary pair */
3854 assert(!s0->current_picture_ptr);
3855 s0->first_field = FIELD_PICTURE;
3858 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3859 s0->first_field = 0;
3866 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3868 assert(s->mb_num == s->mb_width * s->mb_height);
3869 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3870 first_mb_in_slice >= s->mb_num){
3871 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3874 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3875 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3876 if (s->picture_structure == PICT_BOTTOM_FIELD)
3877 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3878 assert(s->mb_y < s->mb_height);
3880 if(s->picture_structure==PICT_FRAME){
3881 h->curr_pic_num= h->frame_num;
3882 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3884 h->curr_pic_num= 2*h->frame_num + 1;
3885 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3888 if(h->nal_unit_type == NAL_IDR_SLICE){
3889 get_ue_golomb(&s->gb); /* idr_pic_id */
3892 if(h->sps.poc_type==0){
3893 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3895 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3896 h->delta_poc_bottom= get_se_golomb(&s->gb);
3900 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3901 h->delta_poc[0]= get_se_golomb(&s->gb);
3903 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3904 h->delta_poc[1]= get_se_golomb(&s->gb);
3909 if(h->pps.redundant_pic_cnt_present){
3910 h->redundant_pic_count= get_ue_golomb(&s->gb);
3913 //set defaults, might be overridden a few lines later
3914 h->ref_count[0]= h->pps.ref_count[0];
3915 h->ref_count[1]= h->pps.ref_count[1];
3917 if(h->slice_type_nos != FF_I_TYPE){
3918 if(h->slice_type_nos == FF_B_TYPE){
3919 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3921 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3923 if(num_ref_idx_active_override_flag){
3924 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3925 if(h->slice_type_nos==FF_B_TYPE)
3926 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3928 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3929 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3930 h->ref_count[0]= h->ref_count[1]= 1;
3934 if(h->slice_type_nos == FF_B_TYPE)
3941 if(!default_ref_list_done){
3942 fill_default_ref_list(h);
3945 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3948 if(h->slice_type_nos!=FF_I_TYPE){
3949 s->last_picture_ptr= &h->ref_list[0][0];
3950 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3952 if(h->slice_type_nos==FF_B_TYPE){
3953 s->next_picture_ptr= &h->ref_list[1][0];
3954 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3957 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3958 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3959 pred_weight_table(h);
3960 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3961 implicit_weight_table(h);
3964 for (i = 0; i < 2; i++) {
3965 h->luma_weight_flag[i] = 0;
3966 h->chroma_weight_flag[i] = 0;
3971 decode_ref_pic_marking(h0, &s->gb);
3974 fill_mbaff_ref_list(h);
3976 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3977 direct_dist_scale_factor(h);
3978 direct_ref_list_init(h);
3980 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3981 tmp = get_ue_golomb_31(&s->gb);
3983 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3986 h->cabac_init_idc= tmp;
3989 h->last_qscale_diff = 0;
3990 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3992 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3996 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3997 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3998 //FIXME qscale / qp ... stuff
3999 if(h->slice_type == FF_SP_TYPE){
4000 get_bits1(&s->gb); /* sp_for_switch_flag */
4002 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4003 get_se_golomb(&s->gb); /* slice_qs_delta */
4006 h->deblocking_filter = 1;
4007 h->slice_alpha_c0_offset = 0;
4008 h->slice_beta_offset = 0;
4009 if( h->pps.deblocking_filter_parameters_present ) {
4010 tmp= get_ue_golomb_31(&s->gb);
4012 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4015 h->deblocking_filter= tmp;
4016 if(h->deblocking_filter < 2)
4017 h->deblocking_filter^= 1; // 1<->0
4019 if( h->deblocking_filter ) {
4020 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4021 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4025 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4026 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4027 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4028 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4029 h->deblocking_filter= 0;
4031 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4032 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4033 /* Cheat slightly for speed:
4034 Do not bother to deblock across slices. */
4035 h->deblocking_filter = 2;
4037 h0->max_contexts = 1;
4038 if(!h0->single_decode_warning) {
4039 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4040 h0->single_decode_warning = 1;
4043 return 1; // deblocking switched inside frame
4048 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4049 slice_group_change_cycle= get_bits(&s->gb, ?);
4052 h0->last_slice_type = slice_type;
4053 h->slice_num = ++h0->current_slice;
4054 if(h->slice_num >= MAX_SLICES){
4055 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4059 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4063 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4064 +(h->ref_list[j][i].reference&3);
4067 for(i=16; i<48; i++)
4068 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4069 +(h->ref_list[j][i].reference&3);
4072 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4073 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4075 s->avctx->refs= h->sps.ref_frame_count;
4077 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4078 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4080 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4082 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4083 pps_id, h->frame_num,
4084 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4085 h->ref_count[0], h->ref_count[1],
4087 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4089 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4090 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4100 static inline int get_level_prefix(GetBitContext *gb){
4104 OPEN_READER(re, gb);
4105 UPDATE_CACHE(re, gb);
4106 buf=GET_CACHE(re, gb);
4108 log= 32 - av_log2(buf);
4110 print_bin(buf>>(32-log), log);
4111 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4114 LAST_SKIP_BITS(re, gb, log);
4115 CLOSE_READER(re, gb);
4120 static inline int get_dct8x8_allowed(H264Context *h){
4121 if(h->sps.direct_8x8_inference_flag)
4122 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4124 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4128 * decodes a residual block.
4129 * @param n block index
4130 * @param scantable scantable
4131 * @param max_coeff number of coefficients in the block
4132 * @return <0 if an error occurred
4134 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4135 MpegEncContext * const s = &h->s;
4136 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4138 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4140 //FIXME put trailing_onex into the context
4142 if(n == CHROMA_DC_BLOCK_INDEX){
4143 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4144 total_coeff= coeff_token>>2;
4146 if(n == LUMA_DC_BLOCK_INDEX){
4147 total_coeff= pred_non_zero_count(h, 0);
4148 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4149 total_coeff= coeff_token>>2;
4151 total_coeff= pred_non_zero_count(h, n);
4152 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4153 total_coeff= coeff_token>>2;
4154 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4158 //FIXME set last_non_zero?
4162 if(total_coeff > (unsigned)max_coeff) {
4163 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4167 trailing_ones= coeff_token&3;
4168 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4169 assert(total_coeff<=16);
4171 i = show_bits(gb, 3);
4172 skip_bits(gb, trailing_ones);
4173 level[0] = 1-((i&4)>>1);
4174 level[1] = 1-((i&2) );
4175 level[2] = 1-((i&1)<<1);
4177 if(trailing_ones<total_coeff) {
4179 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4180 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4181 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4183 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4184 if(level_code >= 100){
4185 prefix= level_code - 100;
4186 if(prefix == LEVEL_TAB_BITS)
4187 prefix += get_level_prefix(gb);
4189 //first coefficient has suffix_length equal to 0 or 1
4190 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4192 level_code= (prefix<<1) + get_bits1(gb); //part
4194 level_code= prefix; //part
4195 }else if(prefix==14){
4197 level_code= (prefix<<1) + get_bits1(gb); //part
4199 level_code= prefix + get_bits(gb, 4); //part
4201 level_code= 30 + get_bits(gb, prefix-3); //part
4203 level_code += (1<<(prefix-3))-4096;
4206 if(trailing_ones < 3) level_code += 2;
4209 mask= -(level_code&1);
4210 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4212 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4215 if(level_code + 3U > 6U)
4217 level[trailing_ones]= level_code;
4220 //remaining coefficients have suffix_length > 0
4221 for(i=trailing_ones+1;i<total_coeff;i++) {
4222 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4223 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4224 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4226 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4227 if(level_code >= 100){
4228 prefix= level_code - 100;
4229 if(prefix == LEVEL_TAB_BITS){
4230 prefix += get_level_prefix(gb);
4233 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4235 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4237 level_code += (1<<(prefix-3))-4096;
4239 mask= -(level_code&1);
4240 level_code= (((2+level_code)>>1) ^ mask) - mask;
4242 level[i]= level_code;
4244 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4249 if(total_coeff == max_coeff)
4252 if(n == CHROMA_DC_BLOCK_INDEX)
4253 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4255 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4258 coeff_num = zeros_left + total_coeff - 1;
4259 j = scantable[coeff_num];
4261 block[j] = level[0];
4262 for(i=1;i<total_coeff;i++) {
4265 else if(zeros_left < 7){
4266 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4268 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4270 zeros_left -= run_before;
4271 coeff_num -= 1 + run_before;
4272 j= scantable[ coeff_num ];
4277 block[j] = (level[0] * qmul[j] + 32)>>6;
4278 for(i=1;i<total_coeff;i++) {
4281 else if(zeros_left < 7){
4282 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4284 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4286 zeros_left -= run_before;
4287 coeff_num -= 1 + run_before;
4288 j= scantable[ coeff_num ];
4290 block[j]= (level[i] * qmul[j] + 32)>>6;
4295 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4302 static void predict_field_decoding_flag(H264Context *h){
4303 MpegEncContext * const s = &h->s;
4304 const int mb_xy= h->mb_xy;
4305 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4306 ? s->current_picture.mb_type[mb_xy-1]
4307 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4308 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4310 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4314 * decodes a P_SKIP or B_SKIP macroblock
4316 static void decode_mb_skip(H264Context *h){
4317 MpegEncContext * const s = &h->s;
4318 const int mb_xy= h->mb_xy;
4321 memset(h->non_zero_count[mb_xy], 0, 16);
4322 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4325 mb_type|= MB_TYPE_INTERLACED;
4327 if( h->slice_type_nos == FF_B_TYPE )
4329 // just for fill_caches. pred_direct_motion will set the real mb_type
4330 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4332 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4333 pred_direct_motion(h, &mb_type);
4334 mb_type|= MB_TYPE_SKIP;
4339 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4341 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4342 pred_pskip_motion(h, &mx, &my);
4343 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4344 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4347 write_back_motion(h, mb_type);
4348 s->current_picture.mb_type[mb_xy]= mb_type;
4349 s->current_picture.qscale_table[mb_xy]= s->qscale;
4350 h->slice_table[ mb_xy ]= h->slice_num;
4351 h->prev_mb_skipped= 1;
4355 * decodes a macroblock
4356 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4358 static int decode_mb_cavlc(H264Context *h){
4359 MpegEncContext * const s = &h->s;
4361 int partition_count;
4362 unsigned int mb_type, cbp;
4363 int dct8x8_allowed= h->pps.transform_8x8_mode;
4365 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4367 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4368 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4370 if(h->slice_type_nos != FF_I_TYPE){
4371 if(s->mb_skip_run==-1)
4372 s->mb_skip_run= get_ue_golomb(&s->gb);
4374 if (s->mb_skip_run--) {
4375 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4376 if(s->mb_skip_run==0)
4377 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4379 predict_field_decoding_flag(h);
4386 if( (s->mb_y&1) == 0 )
4387 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4390 h->prev_mb_skipped= 0;
4392 mb_type= get_ue_golomb(&s->gb);
4393 if(h->slice_type_nos == FF_B_TYPE){
4395 partition_count= b_mb_type_info[mb_type].partition_count;
4396 mb_type= b_mb_type_info[mb_type].type;
4399 goto decode_intra_mb;
4401 }else if(h->slice_type_nos == FF_P_TYPE){
4403 partition_count= p_mb_type_info[mb_type].partition_count;
4404 mb_type= p_mb_type_info[mb_type].type;
4407 goto decode_intra_mb;
4410 assert(h->slice_type_nos == FF_I_TYPE);
4411 if(h->slice_type == FF_SI_TYPE && mb_type)
4415 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4419 cbp= i_mb_type_info[mb_type].cbp;
4420 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4421 mb_type= i_mb_type_info[mb_type].type;
4425 mb_type |= MB_TYPE_INTERLACED;
4427 h->slice_table[ mb_xy ]= h->slice_num;
4429 if(IS_INTRA_PCM(mb_type)){
4432 // We assume these blocks are very rare so we do not optimize it.
4433 align_get_bits(&s->gb);
4435 // The pixels are stored in the same order as levels in h->mb array.
4436 for(x=0; x < (CHROMA ? 384 : 256); x++){
4437 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4440 // In deblocking, the quantizer is 0
4441 s->current_picture.qscale_table[mb_xy]= 0;
4442 // All coeffs are present
4443 memset(h->non_zero_count[mb_xy], 16, 16);
4445 s->current_picture.mb_type[mb_xy]= mb_type;
4450 h->ref_count[0] <<= 1;
4451 h->ref_count[1] <<= 1;
4454 fill_caches(h, mb_type, 0);
4457 if(IS_INTRA(mb_type)){
4459 // init_top_left_availability(h);
4460 if(IS_INTRA4x4(mb_type)){
4463 if(dct8x8_allowed && get_bits1(&s->gb)){
4464 mb_type |= MB_TYPE_8x8DCT;
4468 // fill_intra4x4_pred_table(h);
4469 for(i=0; i<16; i+=di){
4470 int mode= pred_intra_mode(h, i);
4472 if(!get_bits1(&s->gb)){
4473 const int rem_mode= get_bits(&s->gb, 3);
4474 mode = rem_mode + (rem_mode >= mode);
4478 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4480 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4482 write_back_intra_pred_mode(h);
4483 if( check_intra4x4_pred_mode(h) < 0)
4486 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4487 if(h->intra16x16_pred_mode < 0)
4491 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4494 h->chroma_pred_mode= pred_mode;
4496 }else if(partition_count==4){
4497 int i, j, sub_partition_count[4], list, ref[2][4];
4499 if(h->slice_type_nos == FF_B_TYPE){
4501 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4502 if(h->sub_mb_type[i] >=13){
4503 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4506 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4507 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4509 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4510 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4511 pred_direct_motion(h, &mb_type);
4512 h->ref_cache[0][scan8[4]] =
4513 h->ref_cache[1][scan8[4]] =
4514 h->ref_cache[0][scan8[12]] =
4515 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4518 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4520 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4521 if(h->sub_mb_type[i] >=4){
4522 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4525 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4526 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4530 for(list=0; list<h->list_count; list++){
4531 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4533 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4534 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4538 }else if(ref_count == 2){
4539 tmp= get_bits1(&s->gb)^1;
4541 tmp= get_ue_golomb_31(&s->gb);
4543 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4556 dct8x8_allowed = get_dct8x8_allowed(h);
4558 for(list=0; list<h->list_count; list++){
4560 if(IS_DIRECT(h->sub_mb_type[i])) {
4561 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4564 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4565 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4567 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4568 const int sub_mb_type= h->sub_mb_type[i];
4569 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4570 for(j=0; j<sub_partition_count[i]; j++){
4572 const int index= 4*i + block_width*j;
4573 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4574 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4575 mx += get_se_golomb(&s->gb);
4576 my += get_se_golomb(&s->gb);
4577 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4579 if(IS_SUB_8X8(sub_mb_type)){
4581 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4583 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4584 }else if(IS_SUB_8X4(sub_mb_type)){
4585 mv_cache[ 1 ][0]= mx;
4586 mv_cache[ 1 ][1]= my;
4587 }else if(IS_SUB_4X8(sub_mb_type)){
4588 mv_cache[ 8 ][0]= mx;
4589 mv_cache[ 8 ][1]= my;
4591 mv_cache[ 0 ][0]= mx;
4592 mv_cache[ 0 ][1]= my;
4595 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4601 }else if(IS_DIRECT(mb_type)){
4602 pred_direct_motion(h, &mb_type);
4603 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4605 int list, mx, my, i;
4606 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4607 if(IS_16X16(mb_type)){
4608 for(list=0; list<h->list_count; list++){
4610 if(IS_DIR(mb_type, 0, list)){
4611 if(h->ref_count[list]==1){
4613 }else if(h->ref_count[list]==2){
4614 val= get_bits1(&s->gb)^1;
4616 val= get_ue_golomb_31(&s->gb);
4617 if(val >= h->ref_count[list]){
4618 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4623 val= LIST_NOT_USED&0xFF;
4624 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4626 for(list=0; list<h->list_count; list++){
4628 if(IS_DIR(mb_type, 0, list)){
4629 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4630 mx += get_se_golomb(&s->gb);
4631 my += get_se_golomb(&s->gb);
4632 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4634 val= pack16to32(mx,my);
4637 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4640 else if(IS_16X8(mb_type)){
4641 for(list=0; list<h->list_count; list++){
4644 if(IS_DIR(mb_type, i, list)){
4645 if(h->ref_count[list] == 1){
4647 }else if(h->ref_count[list] == 2){
4648 val= get_bits1(&s->gb)^1;
4650 val= get_ue_golomb_31(&s->gb);
4651 if(val >= h->ref_count[list]){
4652 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4657 val= LIST_NOT_USED&0xFF;
4658 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4661 for(list=0; list<h->list_count; list++){
4664 if(IS_DIR(mb_type, i, list)){
4665 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4666 mx += get_se_golomb(&s->gb);
4667 my += get_se_golomb(&s->gb);
4668 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4670 val= pack16to32(mx,my);
4673 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4677 assert(IS_8X16(mb_type));
4678 for(list=0; list<h->list_count; list++){
4681 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4682 if(h->ref_count[list]==1){
4684 }else if(h->ref_count[list]==2){
4685 val= get_bits1(&s->gb)^1;
4687 val= get_ue_golomb_31(&s->gb);
4688 if(val >= h->ref_count[list]){
4689 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4694 val= LIST_NOT_USED&0xFF;
4695 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4698 for(list=0; list<h->list_count; list++){
4701 if(IS_DIR(mb_type, i, list)){
4702 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4703 mx += get_se_golomb(&s->gb);
4704 my += get_se_golomb(&s->gb);
4705 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4707 val= pack16to32(mx,my);
4710 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4716 if(IS_INTER(mb_type))
4717 write_back_motion(h, mb_type);
4719 if(!IS_INTRA16x16(mb_type)){
4720 cbp= get_ue_golomb(&s->gb);
4722 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4727 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4728 else cbp= golomb_to_inter_cbp [cbp];
4730 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4731 else cbp= golomb_to_inter_cbp_gray[cbp];
4736 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4737 if(get_bits1(&s->gb)){
4738 mb_type |= MB_TYPE_8x8DCT;
4739 h->cbp_table[mb_xy]= cbp;
4742 s->current_picture.mb_type[mb_xy]= mb_type;
4744 if(cbp || IS_INTRA16x16(mb_type)){
4745 int i8x8, i4x4, chroma_idx;
4747 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4748 const uint8_t *scan, *scan8x8, *dc_scan;
4750 // fill_non_zero_count_cache(h);
4752 if(IS_INTERLACED(mb_type)){
4753 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4754 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4755 dc_scan= luma_dc_field_scan;
4757 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4758 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4759 dc_scan= luma_dc_zigzag_scan;
4762 dquant= get_se_golomb(&s->gb);
4764 if( dquant > 25 || dquant < -26 ){
4765 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4769 s->qscale += dquant;
4770 if(((unsigned)s->qscale) > 51){
4771 if(s->qscale<0) s->qscale+= 52;
4772 else s->qscale-= 52;
4775 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4776 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4777 if(IS_INTRA16x16(mb_type)){
4778 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4779 return -1; //FIXME continue if partitioned and other return -1 too
4782 assert((cbp&15) == 0 || (cbp&15) == 15);
4785 for(i8x8=0; i8x8<4; i8x8++){
4786 for(i4x4=0; i4x4<4; i4x4++){
4787 const int index= i4x4 + 4*i8x8;
4788 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4794 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4797 for(i8x8=0; i8x8<4; i8x8++){
4798 if(cbp & (1<<i8x8)){
4799 if(IS_8x8DCT(mb_type)){
4800 DCTELEM *buf = &h->mb[64*i8x8];
4802 for(i4x4=0; i4x4<4; i4x4++){
4803 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4804 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4807 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4808 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4810 for(i4x4=0; i4x4<4; i4x4++){
4811 const int index= i4x4 + 4*i8x8;
4813 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4819 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4820 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4826 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4827 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4833 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4834 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4835 for(i4x4=0; i4x4<4; i4x4++){
4836 const int index= 16 + 4*chroma_idx + i4x4;
4837 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4843 uint8_t * const nnz= &h->non_zero_count_cache[0];
4844 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4845 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4848 uint8_t * const nnz= &h->non_zero_count_cache[0];
4849 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4850 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4851 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4853 s->current_picture.qscale_table[mb_xy]= s->qscale;
4854 write_back_non_zero_count(h);
4857 h->ref_count[0] >>= 1;
4858 h->ref_count[1] >>= 1;
4864 static int decode_cabac_field_decoding_flag(H264Context *h) {
4865 MpegEncContext * const s = &h->s;
4866 const int mb_x = s->mb_x;
4867 const int mb_y = s->mb_y & ~1;
4868 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4869 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4871 unsigned int ctx = 0;
4873 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4876 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4880 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4883 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4884 uint8_t *state= &h->cabac_state[ctx_base];
4888 MpegEncContext * const s = &h->s;
4889 const int mba_xy = h->left_mb_xy[0];
4890 const int mbb_xy = h->top_mb_xy;
4892 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4894 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4896 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4897 return 0; /* I4x4 */
4900 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4901 return 0; /* I4x4 */
4904 if( get_cabac_terminate( &h->cabac ) )
4905 return 25; /* PCM */
4907 mb_type = 1; /* I16x16 */
4908 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4909 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4910 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4911 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4912 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4916 static int decode_cabac_mb_type_b( H264Context *h ) {
4917 MpegEncContext * const s = &h->s;
4919 const int mba_xy = h->left_mb_xy[0];
4920 const int mbb_xy = h->top_mb_xy;
4923 assert(h->slice_type_nos == FF_B_TYPE);
4925 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4927 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4930 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4931 return 0; /* B_Direct_16x16 */
4933 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4934 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4937 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4938 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4939 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4940 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4942 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4943 else if( bits == 13 ) {
4944 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4945 } else if( bits == 14 )
4946 return 11; /* B_L1_L0_8x16 */
4947 else if( bits == 15 )
4948 return 22; /* B_8x8 */
4950 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4951 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4954 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4955 MpegEncContext * const s = &h->s;
4959 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4960 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4963 && h->slice_table[mba_xy] == h->slice_num
4964 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4965 mba_xy += s->mb_stride;
4967 mbb_xy = mb_xy - s->mb_stride;
4969 && h->slice_table[mbb_xy] == h->slice_num
4970 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4971 mbb_xy -= s->mb_stride;
4973 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4975 int mb_xy = h->mb_xy;
4977 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4980 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4982 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4985 if( h->slice_type_nos == FF_B_TYPE )
4987 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4990 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4993 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4996 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4997 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4998 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5000 if( mode >= pred_mode )
5006 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5007 const int mba_xy = h->left_mb_xy[0];
5008 const int mbb_xy = h->top_mb_xy;
5012 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5013 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5016 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5019 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5022 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5024 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5030 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5031 int cbp_b, cbp_a, ctx, cbp = 0;
5033 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5034 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5036 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5037 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5038 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5039 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5040 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5041 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5042 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5043 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5046 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5050 cbp_a = (h->left_cbp>>4)&0x03;
5051 cbp_b = (h-> top_cbp>>4)&0x03;
5054 if( cbp_a > 0 ) ctx++;
5055 if( cbp_b > 0 ) ctx += 2;
5056 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5060 if( cbp_a == 2 ) ctx++;
5061 if( cbp_b == 2 ) ctx += 2;
5062 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5064 static int decode_cabac_mb_dqp( H264Context *h) {
5065 int ctx= h->last_qscale_diff != 0;
5068 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5071 if(val > 102) //prevent infinite loop
5076 return (val + 1)>>1 ;
5078 return -((val + 1)>>1);
5080 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5081 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5083 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5085 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5089 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5091 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5092 return 0; /* B_Direct_8x8 */
5093 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5094 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5096 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5097 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5098 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5101 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5102 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5106 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5107 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5110 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5111 int refa = h->ref_cache[list][scan8[n] - 1];
5112 int refb = h->ref_cache[list][scan8[n] - 8];
5116 if( h->slice_type_nos == FF_B_TYPE) {
5117 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5119 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5128 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5131 if(ref >= 32 /*h->ref_list[list]*/){
5138 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5139 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5140 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5141 int ctxbase = (l == 0) ? 40 : 47;
5143 int ctx = (amvd>2) + (amvd>32);
5145 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5150 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5158 while( get_cabac_bypass( &h->cabac ) ) {
5162 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5167 if( get_cabac_bypass( &h->cabac ) )
5171 return get_cabac_bypass_sign( &h->cabac, -mvd );
5174 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5180 nza = h->left_cbp&0x100;
5181 nzb = h-> top_cbp&0x100;
5183 nza = (h->left_cbp>>(6+idx))&0x01;
5184 nzb = (h-> top_cbp>>(6+idx))&0x01;
5187 assert(cat == 1 || cat == 2 || cat == 4);
5188 nza = h->non_zero_count_cache[scan8[idx] - 1];
5189 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5198 return ctx + 4 * cat;
5201 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5202 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5203 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5204 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5205 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5208 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5209 static const int significant_coeff_flag_offset[2][6] = {
5210 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5211 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5213 static const int last_coeff_flag_offset[2][6] = {
5214 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5215 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5217 static const int coeff_abs_level_m1_offset[6] = {
5218 227+0, 227+10, 227+20, 227+30, 227+39, 426
5220 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5221 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5222 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5223 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5224 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5225 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5226 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5227 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5228 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5230 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5231 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5232 * map node ctx => cabac ctx for level=1 */
5233 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5234 /* map node ctx => cabac ctx for level>1 */
5235 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5236 static const uint8_t coeff_abs_level_transition[2][8] = {
5237 /* update node ctx after decoding a level=1 */
5238 { 1, 2, 3, 3, 4, 5, 6, 7 },
5239 /* update node ctx after decoding a level>1 */
5240 { 4, 4, 4, 4, 5, 6, 7, 7 }
5246 int coeff_count = 0;
5249 uint8_t *significant_coeff_ctx_base;
5250 uint8_t *last_coeff_ctx_base;
5251 uint8_t *abs_level_m1_ctx_base;
5254 #define CABAC_ON_STACK
5256 #ifdef CABAC_ON_STACK
5259 cc.range = h->cabac.range;
5260 cc.low = h->cabac.low;
5261 cc.bytestream= h->cabac.bytestream;
5263 #define CC &h->cabac
5267 /* cat: 0-> DC 16x16 n = 0
5268 * 1-> AC 16x16 n = luma4x4idx
5269 * 2-> Luma4x4 n = luma4x4idx
5270 * 3-> DC Chroma n = iCbCr
5271 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5272 * 5-> Luma8x8 n = 4 * luma8x8idx
5275 /* read coded block flag */
5276 if( is_dc || cat != 5 ) {
5277 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5279 h->non_zero_count_cache[scan8[n]] = 0;
5281 #ifdef CABAC_ON_STACK
5282 h->cabac.range = cc.range ;
5283 h->cabac.low = cc.low ;
5284 h->cabac.bytestream= cc.bytestream;
5290 significant_coeff_ctx_base = h->cabac_state
5291 + significant_coeff_flag_offset[MB_FIELD][cat];
5292 last_coeff_ctx_base = h->cabac_state
5293 + last_coeff_flag_offset[MB_FIELD][cat];
5294 abs_level_m1_ctx_base = h->cabac_state
5295 + coeff_abs_level_m1_offset[cat];
5297 if( !is_dc && cat == 5 ) {
5298 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5299 for(last= 0; last < coefs; last++) { \
5300 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5301 if( get_cabac( CC, sig_ctx )) { \
5302 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5303 index[coeff_count++] = last; \
5304 if( get_cabac( CC, last_ctx ) ) { \
5310 if( last == max_coeff -1 ) {\
5311 index[coeff_count++] = last;\
5313 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5314 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5315 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5317 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5319 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5321 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5324 assert(coeff_count > 0);
5328 h->cbp_table[h->mb_xy] |= 0x100;
5330 h->cbp_table[h->mb_xy] |= 0x40 << n;
5333 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5335 assert( cat == 1 || cat == 2 || cat == 4 );
5336 h->non_zero_count_cache[scan8[n]] = coeff_count;
5341 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5343 int j= scantable[index[--coeff_count]];
5345 if( get_cabac( CC, ctx ) == 0 ) {
5346 node_ctx = coeff_abs_level_transition[0][node_ctx];
5348 block[j] = get_cabac_bypass_sign( CC, -1);
5350 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5354 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5355 node_ctx = coeff_abs_level_transition[1][node_ctx];
5357 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5361 if( coeff_abs >= 15 ) {
5363 while( get_cabac_bypass( CC ) ) {
5369 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5375 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5377 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5380 } while( coeff_count );
5381 #ifdef CABAC_ON_STACK
5382 h->cabac.range = cc.range ;
5383 h->cabac.low = cc.low ;
5384 h->cabac.bytestream= cc.bytestream;
5390 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5391 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5394 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5395 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5399 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5401 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5403 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5404 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5408 static inline void compute_mb_neighbors(H264Context *h)
5410 MpegEncContext * const s = &h->s;
5411 const int mb_xy = h->mb_xy;
5412 h->top_mb_xy = mb_xy - s->mb_stride;
5413 h->left_mb_xy[0] = mb_xy - 1;
5415 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5416 const int top_pair_xy = pair_xy - s->mb_stride;
5417 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5418 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5419 const int curr_mb_field_flag = MB_FIELD;
5420 const int bottom = (s->mb_y & 1);
5422 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5423 h->top_mb_xy -= s->mb_stride;
5425 if (!left_mb_field_flag == curr_mb_field_flag) {
5426 h->left_mb_xy[0] = pair_xy - 1;
5428 } else if (FIELD_PICTURE) {
5429 h->top_mb_xy -= s->mb_stride;
5435 * decodes a macroblock
5436 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5438 static int decode_mb_cabac(H264Context *h) {
5439 MpegEncContext * const s = &h->s;
5441 int mb_type, partition_count, cbp = 0;
5442 int dct8x8_allowed= h->pps.transform_8x8_mode;
5444 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5446 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5447 if( h->slice_type_nos != FF_I_TYPE ) {
5449 /* a skipped mb needs the aff flag from the following mb */
5450 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5451 predict_field_decoding_flag(h);
5452 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5453 skip = h->next_mb_skipped;
5455 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5456 /* read skip flags */
5458 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5459 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5460 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5461 if(!h->next_mb_skipped)
5462 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5467 h->cbp_table[mb_xy] = 0;
5468 h->chroma_pred_mode_table[mb_xy] = 0;
5469 h->last_qscale_diff = 0;
5476 if( (s->mb_y&1) == 0 )
5478 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5481 h->prev_mb_skipped = 0;
5483 compute_mb_neighbors(h);
5485 if( h->slice_type_nos == FF_B_TYPE ) {
5486 mb_type = decode_cabac_mb_type_b( h );
5488 partition_count= b_mb_type_info[mb_type].partition_count;
5489 mb_type= b_mb_type_info[mb_type].type;
5492 goto decode_intra_mb;
5494 } else if( h->slice_type_nos == FF_P_TYPE ) {
5495 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5497 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5498 /* P_L0_D16x16, P_8x8 */
5499 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5501 /* P_L0_D8x16, P_L0_D16x8 */
5502 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5504 partition_count= p_mb_type_info[mb_type].partition_count;
5505 mb_type= p_mb_type_info[mb_type].type;
5507 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5508 goto decode_intra_mb;
5511 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5512 if(h->slice_type == FF_SI_TYPE && mb_type)
5514 assert(h->slice_type_nos == FF_I_TYPE);
5516 partition_count = 0;
5517 cbp= i_mb_type_info[mb_type].cbp;
5518 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5519 mb_type= i_mb_type_info[mb_type].type;
5522 mb_type |= MB_TYPE_INTERLACED;
5524 h->slice_table[ mb_xy ]= h->slice_num;
5526 if(IS_INTRA_PCM(mb_type)) {
5529 // We assume these blocks are very rare so we do not optimize it.
5530 // FIXME The two following lines get the bitstream position in the cabac
5531 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5532 ptr= h->cabac.bytestream;
5533 if(h->cabac.low&0x1) ptr--;
5535 if(h->cabac.low&0x1FF) ptr--;
5538 // The pixels are stored in the same order as levels in h->mb array.
5539 memcpy(h->mb, ptr, 256); ptr+=256;
5541 memcpy(h->mb+128, ptr, 128); ptr+=128;
5544 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5546 // All blocks are present
5547 h->cbp_table[mb_xy] = 0x1ef;
5548 h->chroma_pred_mode_table[mb_xy] = 0;
5549 // In deblocking, the quantizer is 0
5550 s->current_picture.qscale_table[mb_xy]= 0;
5551 // All coeffs are present
5552 memset(h->non_zero_count[mb_xy], 16, 16);
5553 s->current_picture.mb_type[mb_xy]= mb_type;
5554 h->last_qscale_diff = 0;
5559 h->ref_count[0] <<= 1;
5560 h->ref_count[1] <<= 1;
5563 fill_caches(h, mb_type, 0);
5565 if( IS_INTRA( mb_type ) ) {
5567 if( IS_INTRA4x4( mb_type ) ) {
5568 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5569 mb_type |= MB_TYPE_8x8DCT;
5570 for( i = 0; i < 16; i+=4 ) {
5571 int pred = pred_intra_mode( h, i );
5572 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5573 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5576 for( i = 0; i < 16; i++ ) {
5577 int pred = pred_intra_mode( h, i );
5578 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5580 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5583 write_back_intra_pred_mode(h);
5584 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5586 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5587 if( h->intra16x16_pred_mode < 0 ) return -1;
5590 h->chroma_pred_mode_table[mb_xy] =
5591 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5593 pred_mode= check_intra_pred_mode( h, pred_mode );
5594 if( pred_mode < 0 ) return -1;
5595 h->chroma_pred_mode= pred_mode;
5597 } else if( partition_count == 4 ) {
5598 int i, j, sub_partition_count[4], list, ref[2][4];
5600 if( h->slice_type_nos == FF_B_TYPE ) {
5601 for( i = 0; i < 4; i++ ) {
5602 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5603 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5604 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5606 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5607 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5608 pred_direct_motion(h, &mb_type);
5609 h->ref_cache[0][scan8[4]] =
5610 h->ref_cache[1][scan8[4]] =
5611 h->ref_cache[0][scan8[12]] =
5612 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5613 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5614 for( i = 0; i < 4; i++ )
5615 if( IS_DIRECT(h->sub_mb_type[i]) )
5616 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5620 for( i = 0; i < 4; i++ ) {
5621 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5622 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5623 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5627 for( list = 0; list < h->list_count; list++ ) {
5628 for( i = 0; i < 4; i++ ) {
5629 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5630 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5631 if( h->ref_count[list] > 1 ){
5632 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5633 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5634 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5642 h->ref_cache[list][ scan8[4*i]+1 ]=
5643 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5648 dct8x8_allowed = get_dct8x8_allowed(h);
5650 for(list=0; list<h->list_count; list++){
5652 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5653 if(IS_DIRECT(h->sub_mb_type[i])){
5654 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5658 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5659 const int sub_mb_type= h->sub_mb_type[i];
5660 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5661 for(j=0; j<sub_partition_count[i]; j++){
5664 const int index= 4*i + block_width*j;
5665 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5666 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5667 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5669 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5670 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5671 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5673 if(IS_SUB_8X8(sub_mb_type)){
5675 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5677 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5680 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5682 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5683 }else if(IS_SUB_8X4(sub_mb_type)){
5684 mv_cache[ 1 ][0]= mx;
5685 mv_cache[ 1 ][1]= my;
5687 mvd_cache[ 1 ][0]= mx - mpx;
5688 mvd_cache[ 1 ][1]= my - mpy;
5689 }else if(IS_SUB_4X8(sub_mb_type)){
5690 mv_cache[ 8 ][0]= mx;
5691 mv_cache[ 8 ][1]= my;
5693 mvd_cache[ 8 ][0]= mx - mpx;
5694 mvd_cache[ 8 ][1]= my - mpy;
5696 mv_cache[ 0 ][0]= mx;
5697 mv_cache[ 0 ][1]= my;
5699 mvd_cache[ 0 ][0]= mx - mpx;
5700 mvd_cache[ 0 ][1]= my - mpy;
5703 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5704 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5705 p[0] = p[1] = p[8] = p[9] = 0;
5706 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5710 } else if( IS_DIRECT(mb_type) ) {
5711 pred_direct_motion(h, &mb_type);
5712 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5713 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5714 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5716 int list, mx, my, i, mpx, mpy;
5717 if(IS_16X16(mb_type)){
5718 for(list=0; list<h->list_count; list++){
5719 if(IS_DIR(mb_type, 0, list)){
5721 if(h->ref_count[list] > 1){
5722 ref= decode_cabac_mb_ref(h, list, 0);
5723 if(ref >= (unsigned)h->ref_count[list]){
5724 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5729 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5731 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5733 for(list=0; list<h->list_count; list++){
5734 if(IS_DIR(mb_type, 0, list)){
5735 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5737 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5738 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5739 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5741 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5742 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5744 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5747 else if(IS_16X8(mb_type)){
5748 for(list=0; list<h->list_count; list++){
5750 if(IS_DIR(mb_type, i, list)){
5752 if(h->ref_count[list] > 1){
5753 ref= decode_cabac_mb_ref( h, list, 8*i );
5754 if(ref >= (unsigned)h->ref_count[list]){
5755 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5760 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5762 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5765 for(list=0; list<h->list_count; list++){
5767 if(IS_DIR(mb_type, i, list)){
5768 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5769 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5770 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5771 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5773 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5774 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5776 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5777 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5782 assert(IS_8X16(mb_type));
5783 for(list=0; list<h->list_count; list++){
5785 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5787 if(h->ref_count[list] > 1){
5788 ref= decode_cabac_mb_ref( h, list, 4*i );
5789 if(ref >= (unsigned)h->ref_count[list]){
5790 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5795 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5797 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5800 for(list=0; list<h->list_count; list++){
5802 if(IS_DIR(mb_type, i, list)){
5803 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5804 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5805 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5807 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5808 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5809 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5811 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5812 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5819 if( IS_INTER( mb_type ) ) {
5820 h->chroma_pred_mode_table[mb_xy] = 0;
5821 write_back_motion( h, mb_type );
5824 if( !IS_INTRA16x16( mb_type ) ) {
5825 cbp = decode_cabac_mb_cbp_luma( h );
5827 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5830 h->cbp_table[mb_xy] = h->cbp = cbp;
5832 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5833 if( decode_cabac_mb_transform_size( h ) )
5834 mb_type |= MB_TYPE_8x8DCT;
5836 s->current_picture.mb_type[mb_xy]= mb_type;
5838 if( cbp || IS_INTRA16x16( mb_type ) ) {
5839 const uint8_t *scan, *scan8x8, *dc_scan;
5840 const uint32_t *qmul;
5843 if(IS_INTERLACED(mb_type)){
5844 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5845 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5846 dc_scan= luma_dc_field_scan;
5848 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5849 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5850 dc_scan= luma_dc_zigzag_scan;
5853 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5854 if( dqp == INT_MIN ){
5855 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5859 if(((unsigned)s->qscale) > 51){
5860 if(s->qscale<0) s->qscale+= 52;
5861 else s->qscale-= 52;
5863 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5864 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5866 if( IS_INTRA16x16( mb_type ) ) {
5868 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5869 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5872 qmul = h->dequant4_coeff[0][s->qscale];
5873 for( i = 0; i < 16; i++ ) {
5874 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5875 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5878 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5882 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5883 if( cbp & (1<<i8x8) ) {
5884 if( IS_8x8DCT(mb_type) ) {
5885 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5886 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5888 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5889 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5890 const int index = 4*i8x8 + i4x4;
5891 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5893 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5894 //STOP_TIMER("decode_residual")
5898 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5899 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5906 for( c = 0; c < 2; c++ ) {
5907 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5908 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5914 for( c = 0; c < 2; c++ ) {
5915 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5916 for( i = 0; i < 4; i++ ) {
5917 const int index = 16 + 4 * c + i;
5918 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5919 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5923 uint8_t * const nnz= &h->non_zero_count_cache[0];
5924 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5925 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5928 uint8_t * const nnz= &h->non_zero_count_cache[0];
5929 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5930 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5931 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5932 h->last_qscale_diff = 0;
5935 s->current_picture.qscale_table[mb_xy]= s->qscale;
5936 write_back_non_zero_count(h);
5939 h->ref_count[0] >>= 1;
5940 h->ref_count[1] >>= 1;
5947 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5948 const int index_a = qp + h->slice_alpha_c0_offset;
5949 const int alpha = (alpha_table+52)[index_a];
5950 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5954 tc[0] = (tc0_table+52)[index_a][bS[0]];
5955 tc[1] = (tc0_table+52)[index_a][bS[1]];
5956 tc[2] = (tc0_table+52)[index_a][bS[2]];
5957 tc[3] = (tc0_table+52)[index_a][bS[3]];
5958 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5960 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5963 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5964 const int index_a = qp + h->slice_alpha_c0_offset;
5965 const int alpha = (alpha_table+52)[index_a];
5966 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5970 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5971 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5972 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5973 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5974 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5976 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5980 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5982 for( i = 0; i < 16; i++, pix += stride) {
5988 int bS_index = (i >> 1);
5991 bS_index |= (i & 1);
5994 if( bS[bS_index] == 0 ) {
5998 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5999 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6000 alpha = (alpha_table+52)[index_a];
6001 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6003 if( bS[bS_index] < 4 ) {
6004 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6005 const int p0 = pix[-1];
6006 const int p1 = pix[-2];
6007 const int p2 = pix[-3];
6008 const int q0 = pix[0];
6009 const int q1 = pix[1];
6010 const int q2 = pix[2];
6012 if( FFABS( p0 - q0 ) < alpha &&
6013 FFABS( p1 - p0 ) < beta &&
6014 FFABS( q1 - q0 ) < beta ) {
6018 if( FFABS( p2 - p0 ) < beta ) {
6019 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6022 if( FFABS( q2 - q0 ) < beta ) {
6023 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6027 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6028 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6029 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6030 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6033 const int p0 = pix[-1];
6034 const int p1 = pix[-2];
6035 const int p2 = pix[-3];
6037 const int q0 = pix[0];
6038 const int q1 = pix[1];
6039 const int q2 = pix[2];
6041 if( FFABS( p0 - q0 ) < alpha &&
6042 FFABS( p1 - p0 ) < beta &&
6043 FFABS( q1 - q0 ) < beta ) {
6045 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6046 if( FFABS( p2 - p0 ) < beta)
6048 const int p3 = pix[-4];
6050 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6051 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6052 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6055 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6057 if( FFABS( q2 - q0 ) < beta)
6059 const int q3 = pix[3];
6061 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6062 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6063 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6066 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6070 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6071 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6073 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6078 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6080 for( i = 0; i < 8; i++, pix += stride) {
6088 if( bS[bS_index] == 0 ) {
6092 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6093 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6094 alpha = (alpha_table+52)[index_a];
6095 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6097 if( bS[bS_index] < 4 ) {
6098 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6099 const int p0 = pix[-1];
6100 const int p1 = pix[-2];
6101 const int q0 = pix[0];
6102 const int q1 = pix[1];
6104 if( FFABS( p0 - q0 ) < alpha &&
6105 FFABS( p1 - p0 ) < beta &&
6106 FFABS( q1 - q0 ) < beta ) {
6107 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6109 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6110 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6111 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6114 const int p0 = pix[-1];
6115 const int p1 = pix[-2];
6116 const int q0 = pix[0];
6117 const int q1 = pix[1];
6119 if( FFABS( p0 - q0 ) < alpha &&
6120 FFABS( p1 - p0 ) < beta &&
6121 FFABS( q1 - q0 ) < beta ) {
6123 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6124 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6125 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6131 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6132 const int index_a = qp + h->slice_alpha_c0_offset;
6133 const int alpha = (alpha_table+52)[index_a];
6134 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6138 tc[0] = (tc0_table+52)[index_a][bS[0]];
6139 tc[1] = (tc0_table+52)[index_a][bS[1]];
6140 tc[2] = (tc0_table+52)[index_a][bS[2]];
6141 tc[3] = (tc0_table+52)[index_a][bS[3]];
6142 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6144 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6148 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6149 const int index_a = qp + h->slice_alpha_c0_offset;
6150 const int alpha = (alpha_table+52)[index_a];
6151 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6155 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6156 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6157 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6158 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6159 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6161 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6165 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6166 MpegEncContext * const s = &h->s;
6167 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6169 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6173 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6174 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6175 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6176 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6177 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6180 assert(!FRAME_MBAFF);
6182 mb_type = s->current_picture.mb_type[mb_xy];
6183 qp = s->current_picture.qscale_table[mb_xy];
6184 qp0 = s->current_picture.qscale_table[mb_xy-1];
6185 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6186 qpc = get_chroma_qp( h, 0, qp );
6187 qpc0 = get_chroma_qp( h, 0, qp0 );
6188 qpc1 = get_chroma_qp( h, 0, qp1 );
6189 qp0 = (qp + qp0 + 1) >> 1;
6190 qp1 = (qp + qp1 + 1) >> 1;
6191 qpc0 = (qpc + qpc0 + 1) >> 1;
6192 qpc1 = (qpc + qpc1 + 1) >> 1;
6193 qp_thresh = 15 - h->slice_alpha_c0_offset;
6194 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6195 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6198 if( IS_INTRA(mb_type) ) {
6199 int16_t bS4[4] = {4,4,4,4};
6200 int16_t bS3[4] = {3,3,3,3};
6201 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6202 if( IS_8x8DCT(mb_type) ) {
6203 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6204 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6205 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6206 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6208 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6209 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6210 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6211 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6212 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6213 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6214 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6215 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6217 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6218 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6219 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6220 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6221 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6222 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6223 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6224 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6227 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6228 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6230 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6232 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6234 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6235 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6236 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6237 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6239 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6240 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6241 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6242 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6244 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6245 bSv[0][0] = 0x0004000400040004ULL;
6246 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6247 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6249 #define FILTER(hv,dir,edge)\
6250 if(bSv[dir][edge]) {\
6251 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6253 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6254 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6260 } else if( IS_8x8DCT(mb_type) ) {
6280 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6281 MpegEncContext * const s = &h->s;
6283 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6284 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6285 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6286 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6287 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6289 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6290 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6291 // how often to recheck mv-based bS when iterating between edges
6292 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6293 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6294 // how often to recheck mv-based bS when iterating along each edge
6295 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6297 if (first_vertical_edge_done) {
6301 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6304 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6305 && !IS_INTERLACED(mb_type)
6306 && IS_INTERLACED(mbm_type)
6308 // This is a special case in the norm where the filtering must
6309 // be done twice (one each of the field) even if we are in a
6310 // frame macroblock.
6312 static const int nnz_idx[4] = {4,5,6,3};
6313 unsigned int tmp_linesize = 2 * linesize;
6314 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6315 int mbn_xy = mb_xy - 2 * s->mb_stride;
6320 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6321 if( IS_INTRA(mb_type) ||
6322 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6323 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6325 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6326 for( i = 0; i < 4; i++ ) {
6327 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6328 mbn_nnz[nnz_idx[i]] != 0 )
6334 // Do not use s->qscale as luma quantizer because it has not the same
6335 // value in IPCM macroblocks.
6336 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6337 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6338 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6339 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6340 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6341 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6342 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6343 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6350 for( edge = start; edge < edges; edge++ ) {
6351 /* mbn_xy: neighbor macroblock */
6352 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6353 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6354 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6358 if( (edge&1) && IS_8x8DCT(mb_type) )
6361 if( IS_INTRA(mb_type) ||
6362 IS_INTRA(mbn_type) ) {
6365 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6366 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6375 bS[0] = bS[1] = bS[2] = bS[3] = value;
6380 if( edge & mask_edge ) {
6381 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6384 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6385 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6388 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6389 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6390 int bn_idx= b_idx - (dir ? 8:1);
6393 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6394 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6395 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6396 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6399 if(h->slice_type_nos == FF_B_TYPE && v){
6401 for( l = 0; !v && l < 2; l++ ) {
6403 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6404 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6405 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6409 bS[0] = bS[1] = bS[2] = bS[3] = v;
6415 for( i = 0; i < 4; i++ ) {
6416 int x = dir == 0 ? edge : i;
6417 int y = dir == 0 ? i : edge;
6418 int b_idx= 8 + 4 + x + 8*y;
6419 int bn_idx= b_idx - (dir ? 8:1);
6421 if( h->non_zero_count_cache[b_idx] |
6422 h->non_zero_count_cache[bn_idx] ) {
6428 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6429 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6430 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6431 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6437 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6439 for( l = 0; l < 2; l++ ) {
6441 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6442 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6443 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6452 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6457 // Do not use s->qscale as luma quantizer because it has not the same
6458 // value in IPCM macroblocks.
6459 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6460 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6461 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6462 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6464 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6465 if( (edge&1) == 0 ) {
6466 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6467 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6468 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6469 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6472 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6473 if( (edge&1) == 0 ) {
6474 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6475 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6476 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6477 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6483 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6484 MpegEncContext * const s = &h->s;
6485 const int mb_xy= mb_x + mb_y*s->mb_stride;
6486 const int mb_type = s->current_picture.mb_type[mb_xy];
6487 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6488 int first_vertical_edge_done = 0;
6491 //for sufficiently low qp, filtering wouldn't do anything
6492 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6494 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6495 int qp = s->current_picture.qscale_table[mb_xy];
6497 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6498 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6503 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6504 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6505 int top_type, left_type[2];
6506 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6507 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6508 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6510 if(IS_8x8DCT(top_type)){
6511 h->non_zero_count_cache[4+8*0]=
6512 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6513 h->non_zero_count_cache[6+8*0]=
6514 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6516 if(IS_8x8DCT(left_type[0])){
6517 h->non_zero_count_cache[3+8*1]=
6518 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6520 if(IS_8x8DCT(left_type[1])){
6521 h->non_zero_count_cache[3+8*3]=
6522 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6525 if(IS_8x8DCT(mb_type)){
6526 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6527 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6529 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6530 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6532 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6533 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6535 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6536 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6541 // left mb is in picture
6542 && h->slice_table[mb_xy-1] != 0xFFFF
6543 // and current and left pair do not have the same interlaced type
6544 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6545 // and left mb is in the same slice if deblocking_filter == 2
6546 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6547 /* First vertical edge is different in MBAFF frames
6548 * There are 8 different bS to compute and 2 different Qp
6550 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6551 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6556 int mb_qp, mbn0_qp, mbn1_qp;
6558 first_vertical_edge_done = 1;
6560 if( IS_INTRA(mb_type) )
6561 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6563 for( i = 0; i < 8; i++ ) {
6564 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6566 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6568 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6569 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6570 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6572 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6579 mb_qp = s->current_picture.qscale_table[mb_xy];
6580 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6581 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6582 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6583 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6584 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6585 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6586 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6587 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6588 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6589 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6590 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6591 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6594 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6595 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6596 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6597 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6598 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6602 for( dir = 0; dir < 2; dir++ )
6603 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6605 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6606 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6610 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6611 H264Context *h = *(void**)arg;
6612 MpegEncContext * const s = &h->s;
6613 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6617 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6618 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6620 if( h->pps.cabac ) {
6624 align_get_bits( &s->gb );
6627 ff_init_cabac_states( &h->cabac);
6628 ff_init_cabac_decoder( &h->cabac,
6629 s->gb.buffer + get_bits_count(&s->gb)/8,
6630 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6631 /* calculate pre-state */
6632 for( i= 0; i < 460; i++ ) {
6634 if( h->slice_type_nos == FF_I_TYPE )
6635 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6637 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6640 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6642 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6647 int ret = decode_mb_cabac(h);
6649 //STOP_TIMER("decode_mb_cabac")
6651 if(ret>=0) hl_decode_mb(h);
6653 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6656 ret = decode_mb_cabac(h);
6658 if(ret>=0) hl_decode_mb(h);
6661 eos = get_cabac_terminate( &h->cabac );
6663 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6664 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6665 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6669 if( ++s->mb_x >= s->mb_width ) {
6671 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6673 if(FIELD_OR_MBAFF_PICTURE) {
6678 if( eos || s->mb_y >= s->mb_height ) {
6679 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6680 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6687 int ret = decode_mb_cavlc(h);
6689 if(ret>=0) hl_decode_mb(h);
6691 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6693 ret = decode_mb_cavlc(h);
6695 if(ret>=0) hl_decode_mb(h);
6700 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6701 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6706 if(++s->mb_x >= s->mb_width){
6708 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6710 if(FIELD_OR_MBAFF_PICTURE) {
6713 if(s->mb_y >= s->mb_height){
6714 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6716 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6717 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6721 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6728 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6729 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6730 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6731 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6735 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6744 for(;s->mb_y < s->mb_height; s->mb_y++){
6745 for(;s->mb_x < s->mb_width; s->mb_x++){
6746 int ret= decode_mb(h);
6751 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6752 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6757 if(++s->mb_x >= s->mb_width){
6759 if(++s->mb_y >= s->mb_height){
6760 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6761 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6765 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6772 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6773 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6774 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6778 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6785 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6788 return -1; //not reached
6791 static int decode_picture_timing(H264Context *h){
6792 MpegEncContext * const s = &h->s;
6793 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6794 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6795 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6797 if(h->sps.pic_struct_present_flag){
6798 unsigned int i, num_clock_ts;
6799 h->sei_pic_struct = get_bits(&s->gb, 4);
6802 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6805 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6807 for (i = 0 ; i < num_clock_ts ; i++){
6808 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6809 unsigned int full_timestamp_flag;
6810 h->sei_ct_type |= 1<<get_bits(&s->gb, 2);
6811 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6812 skip_bits(&s->gb, 5); /* counting_type */
6813 full_timestamp_flag = get_bits(&s->gb, 1);
6814 skip_bits(&s->gb, 1); /* discontinuity_flag */
6815 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6816 skip_bits(&s->gb, 8); /* n_frames */
6817 if(full_timestamp_flag){
6818 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6819 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6820 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6822 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6823 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6824 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6825 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6826 if(get_bits(&s->gb, 1)) /* hours_flag */
6827 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6831 if(h->sps.time_offset_length > 0)
6832 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6839 static int decode_unregistered_user_data(H264Context *h, int size){
6840 MpegEncContext * const s = &h->s;
6841 uint8_t user_data[16+256];
6847 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6848 user_data[i]= get_bits(&s->gb, 8);
6852 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6853 if(e==1 && build>=0)
6854 h->x264_build= build;
6856 if(s->avctx->debug & FF_DEBUG_BUGS)
6857 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6860 skip_bits(&s->gb, 8);
6865 static int decode_recovery_point(H264Context *h){
6866 MpegEncContext * const s = &h->s;
6868 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6869 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6874 static int decode_buffering_period(H264Context *h){
6875 MpegEncContext * const s = &h->s;
6876 unsigned int sps_id;
6880 sps_id = get_ue_golomb_31(&s->gb);
6881 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6882 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6885 sps = h->sps_buffers[sps_id];
6887 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6888 if (sps->nal_hrd_parameters_present_flag) {
6889 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6890 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6891 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6894 if (sps->vcl_hrd_parameters_present_flag) {
6895 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6896 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6897 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6901 h->sei_buffering_period_present = 1;
6905 int ff_h264_decode_sei(H264Context *h){
6906 MpegEncContext * const s = &h->s;
6908 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6913 type+= show_bits(&s->gb, 8);
6914 }while(get_bits(&s->gb, 8) == 255);
6918 size+= show_bits(&s->gb, 8);
6919 }while(get_bits(&s->gb, 8) == 255);
6922 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6923 if(decode_picture_timing(h) < 0)
6926 case SEI_TYPE_USER_DATA_UNREGISTERED:
6927 if(decode_unregistered_user_data(h, size) < 0)
6930 case SEI_TYPE_RECOVERY_POINT:
6931 if(decode_recovery_point(h) < 0)
6934 case SEI_BUFFERING_PERIOD:
6935 if(decode_buffering_period(h) < 0)
6939 skip_bits(&s->gb, 8*size);
6942 //FIXME check bits here
6943 align_get_bits(&s->gb);
6949 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6950 MpegEncContext * const s = &h->s;
6952 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6954 if(cpb_count > 32U){
6955 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6959 get_bits(&s->gb, 4); /* bit_rate_scale */
6960 get_bits(&s->gb, 4); /* cpb_size_scale */
6961 for(i=0; i<cpb_count; i++){
6962 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6963 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6964 get_bits1(&s->gb); /* cbr_flag */
6966 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6967 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6968 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6969 sps->time_offset_length = get_bits(&s->gb, 5);
6970 sps->cpb_cnt = cpb_count;
6974 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6975 MpegEncContext * const s = &h->s;
6976 int aspect_ratio_info_present_flag;
6977 unsigned int aspect_ratio_idc;
6979 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6981 if( aspect_ratio_info_present_flag ) {
6982 aspect_ratio_idc= get_bits(&s->gb, 8);
6983 if( aspect_ratio_idc == EXTENDED_SAR ) {
6984 sps->sar.num= get_bits(&s->gb, 16);
6985 sps->sar.den= get_bits(&s->gb, 16);
6986 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6987 sps->sar= pixel_aspect[aspect_ratio_idc];
6989 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6996 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6998 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6999 get_bits1(&s->gb); /* overscan_appropriate_flag */
7002 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7003 get_bits(&s->gb, 3); /* video_format */
7004 get_bits1(&s->gb); /* video_full_range_flag */
7005 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7006 get_bits(&s->gb, 8); /* colour_primaries */
7007 get_bits(&s->gb, 8); /* transfer_characteristics */
7008 get_bits(&s->gb, 8); /* matrix_coefficients */
7012 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7013 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7014 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7017 sps->timing_info_present_flag = get_bits1(&s->gb);
7018 if(sps->timing_info_present_flag){
7019 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7020 sps->time_scale = get_bits_long(&s->gb, 32);
7021 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7024 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7025 if(sps->nal_hrd_parameters_present_flag)
7026 if(decode_hrd_parameters(h, sps) < 0)
7028 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7029 if(sps->vcl_hrd_parameters_present_flag)
7030 if(decode_hrd_parameters(h, sps) < 0)
7032 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7033 get_bits1(&s->gb); /* low_delay_hrd_flag */
7034 sps->pic_struct_present_flag = get_bits1(&s->gb);
7036 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7037 if(sps->bitstream_restriction_flag){
7038 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7039 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7040 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7041 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7042 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7043 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7044 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7046 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7047 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7055 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7056 const uint8_t *jvt_list, const uint8_t *fallback_list){
7057 MpegEncContext * const s = &h->s;
7058 int i, last = 8, next = 8;
7059 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7060 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7061 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7063 for(i=0;i<size;i++){
7065 next = (last + get_se_golomb(&s->gb)) & 0xff;
7066 if(!i && !next){ /* matrix not written, we use the preset one */
7067 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7070 last = factors[scan[i]] = next ? next : last;
7074 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7075 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7076 MpegEncContext * const s = &h->s;
7077 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7078 const uint8_t *fallback[4] = {
7079 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7080 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7081 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7082 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7084 if(get_bits1(&s->gb)){
7085 sps->scaling_matrix_present |= is_sps;
7086 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7087 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7088 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7089 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7090 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7091 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7092 if(is_sps || pps->transform_8x8_mode){
7093 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7094 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7099 int ff_h264_decode_seq_parameter_set(H264Context *h){
7100 MpegEncContext * const s = &h->s;
7101 int profile_idc, level_idc;
7102 unsigned int sps_id;
7106 profile_idc= get_bits(&s->gb, 8);
7107 get_bits1(&s->gb); //constraint_set0_flag
7108 get_bits1(&s->gb); //constraint_set1_flag
7109 get_bits1(&s->gb); //constraint_set2_flag
7110 get_bits1(&s->gb); //constraint_set3_flag
7111 get_bits(&s->gb, 4); // reserved
7112 level_idc= get_bits(&s->gb, 8);
7113 sps_id= get_ue_golomb_31(&s->gb);
7115 if(sps_id >= MAX_SPS_COUNT) {
7116 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7119 sps= av_mallocz(sizeof(SPS));
7123 sps->profile_idc= profile_idc;
7124 sps->level_idc= level_idc;
7126 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7127 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7128 sps->scaling_matrix_present = 0;
7130 if(sps->profile_idc >= 100){ //high profile
7131 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7132 if(sps->chroma_format_idc == 3)
7133 sps->residual_color_transform_flag = get_bits1(&s->gb);
7134 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7135 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7136 sps->transform_bypass = get_bits1(&s->gb);
7137 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7139 sps->chroma_format_idc= 1;
7142 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7143 sps->poc_type= get_ue_golomb_31(&s->gb);
7145 if(sps->poc_type == 0){ //FIXME #define
7146 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7147 } else if(sps->poc_type == 1){//FIXME #define
7148 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7149 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7150 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7151 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7153 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7154 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7158 for(i=0; i<sps->poc_cycle_length; i++)
7159 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7160 }else if(sps->poc_type != 2){
7161 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7165 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7166 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7167 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7170 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7171 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7172 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7173 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7174 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7175 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7179 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7180 if(!sps->frame_mbs_only_flag)
7181 sps->mb_aff= get_bits1(&s->gb);
7185 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7187 #ifndef ALLOW_INTERLACE
7189 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7191 sps->crop= get_bits1(&s->gb);
7193 sps->crop_left = get_ue_golomb(&s->gb);
7194 sps->crop_right = get_ue_golomb(&s->gb);
7195 sps->crop_top = get_ue_golomb(&s->gb);
7196 sps->crop_bottom= get_ue_golomb(&s->gb);
7197 if(sps->crop_left || sps->crop_top){
7198 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7200 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7201 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7207 sps->crop_bottom= 0;
7210 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7211 if( sps->vui_parameters_present_flag )
7212 decode_vui_parameters(h, sps);
7214 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7215 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7216 sps_id, sps->profile_idc, sps->level_idc,
7218 sps->ref_frame_count,
7219 sps->mb_width, sps->mb_height,
7220 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7221 sps->direct_8x8_inference_flag ? "8B8" : "",
7222 sps->crop_left, sps->crop_right,
7223 sps->crop_top, sps->crop_bottom,
7224 sps->vui_parameters_present_flag ? "VUI" : "",
7225 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc],
7226 sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
7227 sps->timing_info_present_flag ? sps->time_scale : 0
7231 av_free(h->sps_buffers[sps_id]);
7232 h->sps_buffers[sps_id]= sps;
7241 build_qp_table(PPS *pps, int t, int index)
7244 for(i = 0; i < 52; i++)
7245 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7248 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7249 MpegEncContext * const s = &h->s;
7250 unsigned int pps_id= get_ue_golomb(&s->gb);
7253 if(pps_id >= MAX_PPS_COUNT) {
7254 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7258 pps= av_mallocz(sizeof(PPS));
7261 pps->sps_id= get_ue_golomb_31(&s->gb);
7262 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7263 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7267 pps->cabac= get_bits1(&s->gb);
7268 pps->pic_order_present= get_bits1(&s->gb);
7269 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7270 if(pps->slice_group_count > 1 ){
7271 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7272 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7273 switch(pps->mb_slice_group_map_type){
7276 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7277 | run_length[ i ] |1 |ue(v) |
7282 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7284 | top_left_mb[ i ] |1 |ue(v) |
7285 | bottom_right_mb[ i ] |1 |ue(v) |
7293 | slice_group_change_direction_flag |1 |u(1) |
7294 | slice_group_change_rate_minus1 |1 |ue(v) |
7299 | slice_group_id_cnt_minus1 |1 |ue(v) |
7300 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7302 | slice_group_id[ i ] |1 |u(v) |
7307 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7308 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7309 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7310 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7314 pps->weighted_pred= get_bits1(&s->gb);
7315 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7316 pps->init_qp= get_se_golomb(&s->gb) + 26;
7317 pps->init_qs= get_se_golomb(&s->gb) + 26;
7318 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7319 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7320 pps->constrained_intra_pred= get_bits1(&s->gb);
7321 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7323 pps->transform_8x8_mode= 0;
7324 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7325 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7326 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7328 if(get_bits_count(&s->gb) < bit_length){
7329 pps->transform_8x8_mode= get_bits1(&s->gb);
7330 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7331 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7333 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7336 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7337 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7338 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7339 h->pps.chroma_qp_diff= 1;
7341 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7342 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7343 pps_id, pps->sps_id,
7344 pps->cabac ? "CABAC" : "CAVLC",
7345 pps->slice_group_count,
7346 pps->ref_count[0], pps->ref_count[1],
7347 pps->weighted_pred ? "weighted" : "",
7348 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7349 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7350 pps->constrained_intra_pred ? "CONSTR" : "",
7351 pps->redundant_pic_cnt_present ? "REDU" : "",
7352 pps->transform_8x8_mode ? "8x8DCT" : ""
7356 av_free(h->pps_buffers[pps_id]);
7357 h->pps_buffers[pps_id]= pps;
7365 * Call decode_slice() for each context.
7367 * @param h h264 master context
7368 * @param context_count number of contexts to execute
7370 static void execute_decode_slices(H264Context *h, int context_count){
7371 MpegEncContext * const s = &h->s;
7372 AVCodecContext * const avctx= s->avctx;
7376 if (s->avctx->hwaccel)
7378 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7380 if(context_count == 1) {
7381 decode_slice(avctx, &h);
7383 for(i = 1; i < context_count; i++) {
7384 hx = h->thread_context[i];
7385 hx->s.error_recognition = avctx->error_recognition;
7386 hx->s.error_count = 0;
7389 avctx->execute(avctx, (void *)decode_slice,
7390 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7392 /* pull back stuff from slices to master context */
7393 hx = h->thread_context[context_count - 1];
7394 s->mb_x = hx->s.mb_x;
7395 s->mb_y = hx->s.mb_y;
7396 s->dropable = hx->s.dropable;
7397 s->picture_structure = hx->s.picture_structure;
7398 for(i = 1; i < context_count; i++)
7399 h->s.error_count += h->thread_context[i]->s.error_count;
7404 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7405 MpegEncContext * const s = &h->s;
7406 AVCodecContext * const avctx= s->avctx;
7408 H264Context *hx; ///< thread context
7409 int context_count = 0;
7411 h->max_contexts = avctx->thread_count;
7414 for(i=0; i<50; i++){
7415 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7418 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7419 h->current_slice = 0;
7420 if (!s->first_field)
7421 s->current_picture_ptr= NULL;
7434 if(buf_index >= buf_size) break;
7436 for(i = 0; i < h->nal_length_size; i++)
7437 nalsize = (nalsize << 8) | buf[buf_index++];
7438 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7443 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7448 // start code prefix search
7449 for(; buf_index + 3 < buf_size; buf_index++){
7450 // This should always succeed in the first iteration.
7451 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7455 if(buf_index+3 >= buf_size) break;
7460 hx = h->thread_context[context_count];
7462 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7463 if (ptr==NULL || dst_length < 0){
7466 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7468 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7470 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7471 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7474 if (h->is_avc && (nalsize != consumed)){
7475 int i, debug_level = AV_LOG_DEBUG;
7476 for (i = consumed; i < nalsize; i++)
7477 if (buf[buf_index+i])
7478 debug_level = AV_LOG_ERROR;
7479 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7483 buf_index += consumed;
7485 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7486 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7491 switch(hx->nal_unit_type){
7493 if (h->nal_unit_type != NAL_IDR_SLICE) {
7494 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7497 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7499 init_get_bits(&hx->s.gb, ptr, bit_length);
7501 hx->inter_gb_ptr= &hx->s.gb;
7502 hx->s.data_partitioning = 0;
7504 if((err = decode_slice_header(hx, h)))
7507 if (s->avctx->hwaccel && h->current_slice == 1) {
7508 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7512 s->current_picture_ptr->key_frame |=
7513 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7514 (h->sei_recovery_frame_cnt >= 0);
7515 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7516 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7517 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7518 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7519 && avctx->skip_frame < AVDISCARD_ALL){
7520 if(avctx->hwaccel) {
7521 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7524 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7525 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7526 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7527 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7533 init_get_bits(&hx->s.gb, ptr, bit_length);
7535 hx->inter_gb_ptr= NULL;
7536 hx->s.data_partitioning = 1;
7538 err = decode_slice_header(hx, h);
7541 init_get_bits(&hx->intra_gb, ptr, bit_length);
7542 hx->intra_gb_ptr= &hx->intra_gb;
7545 init_get_bits(&hx->inter_gb, ptr, bit_length);
7546 hx->inter_gb_ptr= &hx->inter_gb;
7548 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7549 && s->context_initialized
7551 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7552 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7553 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7554 && avctx->skip_frame < AVDISCARD_ALL)
7558 init_get_bits(&s->gb, ptr, bit_length);
7559 ff_h264_decode_sei(h);
7562 init_get_bits(&s->gb, ptr, bit_length);
7563 ff_h264_decode_seq_parameter_set(h);
7565 if(s->flags& CODEC_FLAG_LOW_DELAY)
7568 if(avctx->has_b_frames < 2)
7569 avctx->has_b_frames= !s->low_delay;
7572 init_get_bits(&s->gb, ptr, bit_length);
7574 ff_h264_decode_picture_parameter_set(h, bit_length);
7578 case NAL_END_SEQUENCE:
7579 case NAL_END_STREAM:
7580 case NAL_FILLER_DATA:
7582 case NAL_AUXILIARY_SLICE:
7585 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7588 if(context_count == h->max_contexts) {
7589 execute_decode_slices(h, context_count);
7594 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7596 /* Slice could not be decoded in parallel mode, copy down
7597 * NAL unit stuff to context 0 and restart. Note that
7598 * rbsp_buffer is not transferred, but since we no longer
7599 * run in parallel mode this should not be an issue. */
7600 h->nal_unit_type = hx->nal_unit_type;
7601 h->nal_ref_idc = hx->nal_ref_idc;
7607 execute_decode_slices(h, context_count);
7612 * returns the number of bytes consumed for building the current frame
7614 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7615 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7616 if(pos+10>buf_size) pos=buf_size; // oops ;)
7621 static int decode_frame(AVCodecContext *avctx,
7622 void *data, int *data_size,
7625 const uint8_t *buf = avpkt->data;
7626 int buf_size = avpkt->size;
7627 H264Context *h = avctx->priv_data;
7628 MpegEncContext *s = &h->s;
7629 AVFrame *pict = data;
7632 s->flags= avctx->flags;
7633 s->flags2= avctx->flags2;
7635 /* end of stream, output what is still in the buffers */
7636 if (buf_size == 0) {
7640 //FIXME factorize this with the output code below
7641 out = h->delayed_pic[0];
7643 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7644 if(h->delayed_pic[i]->poc < out->poc){
7645 out = h->delayed_pic[i];
7649 for(i=out_idx; h->delayed_pic[i]; i++)
7650 h->delayed_pic[i] = h->delayed_pic[i+1];
7653 *data_size = sizeof(AVFrame);
7654 *pict= *(AVFrame*)out;
7660 if(h->is_avc && !h->got_avcC) {
7661 int i, cnt, nalsize;
7662 unsigned char *p = avctx->extradata;
7663 if(avctx->extradata_size < 7) {
7664 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7668 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7671 /* sps and pps in the avcC always have length coded with 2 bytes,
7672 so put a fake nal_length_size = 2 while parsing them */
7673 h->nal_length_size = 2;
7674 // Decode sps from avcC
7675 cnt = *(p+5) & 0x1f; // Number of sps
7677 for (i = 0; i < cnt; i++) {
7678 nalsize = AV_RB16(p) + 2;
7679 if(decode_nal_units(h, p, nalsize) < 0) {
7680 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7685 // Decode pps from avcC
7686 cnt = *(p++); // Number of pps
7687 for (i = 0; i < cnt; i++) {
7688 nalsize = AV_RB16(p) + 2;
7689 if(decode_nal_units(h, p, nalsize) != nalsize) {
7690 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7695 // Now store right nal length size, that will be use to parse all other nals
7696 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7697 // Do not reparse avcC
7701 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7702 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7707 buf_index=decode_nal_units(h, buf, buf_size);
7711 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7712 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7713 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7717 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7718 Picture *out = s->current_picture_ptr;
7719 Picture *cur = s->current_picture_ptr;
7720 int i, pics, cross_idr, out_of_order, out_idx;
7724 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7725 s->current_picture_ptr->pict_type= s->pict_type;
7727 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7728 ff_vdpau_h264_set_reference_frames(s);
7731 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7732 h->prev_poc_msb= h->poc_msb;
7733 h->prev_poc_lsb= h->poc_lsb;
7735 h->prev_frame_num_offset= h->frame_num_offset;
7736 h->prev_frame_num= h->frame_num;
7738 if (avctx->hwaccel) {
7739 if (avctx->hwaccel->end_frame(avctx) < 0)
7740 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
7743 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7744 ff_vdpau_h264_picture_complete(s);
7747 * FIXME: Error handling code does not seem to support interlaced
7748 * when slices span multiple rows
7749 * The ff_er_add_slice calls don't work right for bottom
7750 * fields; they cause massive erroneous error concealing
7751 * Error marking covers both fields (top and bottom).
7752 * This causes a mismatched s->error_count
7753 * and a bad error table. Further, the error count goes to
7754 * INT_MAX when called for bottom field, because mb_y is
7755 * past end by one (callers fault) and resync_mb_y != 0
7756 * causes problems for the first MB line, too.
7763 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7764 /* Wait for second field. */
7768 cur->repeat_pict = 0;
7770 /* Signal interlacing information externally. */
7771 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7773 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
7775 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7777 if(h->sps.pic_struct_present_flag){
7778 switch (h->sei_pic_struct)
7780 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7781 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7782 // Signal the possibility of telecined film externally (pic_struct 5,6)
7783 // From these hints, let the applications decide if they apply deinterlacing.
7784 cur->repeat_pict = 1;
7786 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7787 // Force progressive here, as doubling interlaced frame is a bad idea.
7788 cur->interlaced_frame = 0;
7789 cur->repeat_pict = 2;
7791 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7792 cur->interlaced_frame = 0;
7793 cur->repeat_pict = 4;
7797 /* Derive interlacing flag from used decoding process. */
7798 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7801 if (cur->field_poc[0] != cur->field_poc[1]){
7802 /* Derive top_field_first from field pocs. */
7803 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7805 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7806 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7807 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7808 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7809 cur->top_field_first = 1;
7811 cur->top_field_first = 0;
7813 /* Most likely progressive */
7814 cur->top_field_first = 0;
7818 //FIXME do something with unavailable reference frames
7820 /* Sort B-frames into display order */
7822 if(h->sps.bitstream_restriction_flag
7823 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7824 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7828 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7829 && !h->sps.bitstream_restriction_flag){
7830 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7835 while(h->delayed_pic[pics]) pics++;
7837 assert(pics <= MAX_DELAYED_PIC_COUNT);
7839 h->delayed_pic[pics++] = cur;
7840 if(cur->reference == 0)
7841 cur->reference = DELAYED_PIC_REF;
7843 out = h->delayed_pic[0];
7845 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7846 if(h->delayed_pic[i]->poc < out->poc){
7847 out = h->delayed_pic[i];
7850 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7852 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7854 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7856 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7858 ((!cross_idr && out->poc > h->outputed_poc + 2)
7859 || cur->pict_type == FF_B_TYPE)))
7862 s->avctx->has_b_frames++;
7865 if(out_of_order || pics > s->avctx->has_b_frames){
7866 out->reference &= ~DELAYED_PIC_REF;
7867 for(i=out_idx; h->delayed_pic[i]; i++)
7868 h->delayed_pic[i] = h->delayed_pic[i+1];
7870 if(!out_of_order && pics > s->avctx->has_b_frames){
7871 *data_size = sizeof(AVFrame);
7873 h->outputed_poc = out->poc;
7874 *pict= *(AVFrame*)out;
7876 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7881 assert(pict->data[0] || !*data_size);
7882 ff_print_debug_info(s, pict);
7883 //printf("out %d\n", (int)pict->data[0]);
7886 /* Return the Picture timestamp as the frame number */
7887 /* we subtract 1 because it is added on utils.c */
7888 avctx->frame_number = s->picture_number - 1;
7890 return get_consumed_bytes(s, buf_index, buf_size);
7893 static inline void fill_mb_avail(H264Context *h){
7894 MpegEncContext * const s = &h->s;
7895 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7898 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7899 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7900 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7906 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7907 h->mb_avail[4]= 1; //FIXME move out
7908 h->mb_avail[5]= 0; //FIXME move out
7916 #define SIZE (COUNT*40)
7922 // int int_temp[10000];
7924 AVCodecContext avctx;
7926 dsputil_init(&dsp, &avctx);
7928 init_put_bits(&pb, temp, SIZE);
7929 printf("testing unsigned exp golomb\n");
7930 for(i=0; i<COUNT; i++){
7932 set_ue_golomb(&pb, i);
7933 STOP_TIMER("set_ue_golomb");
7935 flush_put_bits(&pb);
7937 init_get_bits(&gb, temp, 8*SIZE);
7938 for(i=0; i<COUNT; i++){
7941 s= show_bits(&gb, 24);
7944 j= get_ue_golomb(&gb);
7946 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7949 STOP_TIMER("get_ue_golomb");
7953 init_put_bits(&pb, temp, SIZE);
7954 printf("testing signed exp golomb\n");
7955 for(i=0; i<COUNT; i++){
7957 set_se_golomb(&pb, i - COUNT/2);
7958 STOP_TIMER("set_se_golomb");
7960 flush_put_bits(&pb);
7962 init_get_bits(&gb, temp, 8*SIZE);
7963 for(i=0; i<COUNT; i++){
7966 s= show_bits(&gb, 24);
7969 j= get_se_golomb(&gb);
7970 if(j != i - COUNT/2){
7971 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7974 STOP_TIMER("get_se_golomb");
7978 printf("testing 4x4 (I)DCT\n");
7981 uint8_t src[16], ref[16];
7982 uint64_t error= 0, max_error=0;
7984 for(i=0; i<COUNT; i++){
7986 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7987 for(j=0; j<16; j++){
7988 ref[j]= random()%255;
7989 src[j]= random()%255;
7992 h264_diff_dct_c(block, src, ref, 4);
7995 for(j=0; j<16; j++){
7996 // printf("%d ", block[j]);
7997 block[j]= block[j]*4;
7998 if(j&1) block[j]= (block[j]*4 + 2)/5;
7999 if(j&4) block[j]= (block[j]*4 + 2)/5;
8003 s->dsp.h264_idct_add(ref, block, 4);
8004 /* for(j=0; j<16; j++){
8005 printf("%d ", ref[j]);
8009 for(j=0; j<16; j++){
8010 int diff= FFABS(src[j] - ref[j]);
8013 max_error= FFMAX(max_error, diff);
8016 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8017 printf("testing quantizer\n");
8018 for(qp=0; qp<52; qp++){
8020 src1_block[i]= src2_block[i]= random()%255;
8023 printf("Testing NAL layer\n");
8025 uint8_t bitstream[COUNT];
8026 uint8_t nal[COUNT*2];
8028 memset(&h, 0, sizeof(H264Context));
8030 for(i=0; i<COUNT; i++){
8038 for(j=0; j<COUNT; j++){
8039 bitstream[j]= (random() % 255) + 1;
8042 for(j=0; j<zeros; j++){
8043 int pos= random() % COUNT;
8044 while(bitstream[pos] == 0){
8053 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8055 printf("encoding failed\n");
8059 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8063 if(out_length != COUNT){
8064 printf("incorrect length %d %d\n", out_length, COUNT);
8068 if(consumed != nal_length){
8069 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8073 if(memcmp(bitstream, out, COUNT)){
8074 printf("mismatch\n");
8080 printf("Testing RBSP\n");
8088 av_cold void ff_h264_free_context(H264Context *h)
8092 av_freep(&h->rbsp_buffer[0]);
8093 av_freep(&h->rbsp_buffer[1]);
8094 free_tables(h); //FIXME cleanup init stuff perhaps
8096 for(i = 0; i < MAX_SPS_COUNT; i++)
8097 av_freep(h->sps_buffers + i);
8099 for(i = 0; i < MAX_PPS_COUNT; i++)
8100 av_freep(h->pps_buffers + i);
8103 static av_cold int decode_end(AVCodecContext *avctx)
8105 H264Context *h = avctx->priv_data;
8106 MpegEncContext *s = &h->s;
8108 ff_h264_free_context(h);
8112 // memset(h, 0, sizeof(H264Context));
8118 AVCodec h264_decoder = {
8122 sizeof(H264Context),
8127 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8129 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8130 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8133 #if CONFIG_H264_VDPAU_DECODER
8134 AVCodec h264_vdpau_decoder = {
8138 sizeof(H264Context),
8143 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8145 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8149 #if CONFIG_SVQ3_DECODER