2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
36 #include "rectangle.h"
37 #include "vdpau_internal.h"
41 #include "x86/h264_i386.h"
48 * Value of Picture.reference when Picture is not a reference picture, but
49 * is held for delayed output.
51 #define DELAYED_PIC_REF 4
53 static VLC coeff_token_vlc[4];
54 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
55 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57 static VLC chroma_dc_coeff_token_vlc;
58 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
59 static const int chroma_dc_coeff_token_vlc_table_size = 256;
61 static VLC total_zeros_vlc[15];
62 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
63 static const int total_zeros_vlc_tables_size = 512;
65 static VLC chroma_dc_total_zeros_vlc[3];
66 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
67 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
69 static VLC run_vlc[6];
70 static VLC_TYPE run_vlc_tables[6][8][2];
71 static const int run_vlc_tables_size = 8;
74 static VLC_TYPE run7_vlc_table[96][2];
75 static const int run7_vlc_table_size = 96;
77 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
78 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
79 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static Picture * remove_long(H264Context *h, int i, int ref_mask);
83 static av_always_inline uint32_t pack16to32(int a, int b){
84 #ifdef WORDS_BIGENDIAN
85 return (b&0xFFFF) + (a<<16);
87 return (a&0xFFFF) + (b<<16);
91 static const uint8_t rem6[52]={
92 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
95 static const uint8_t div6[52]={
96 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
99 static const uint8_t left_block_options[4][8]={
106 #define LEVEL_TAB_BITS 8
107 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
110 MpegEncContext * const s = &h->s;
111 const int mb_xy= h->mb_xy;
112 int topleft_xy, top_xy, topright_xy, left_xy[2];
113 int topleft_type, top_type, topright_type, left_type[2];
114 const uint8_t * left_block;
115 int topleft_partition= -1;
118 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
120 //FIXME deblocking could skip the intra and nnz parts.
121 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
124 /* Wow, what a mess, why didn't they simplify the interlacing & intra
125 * stuff, I can't imagine that these complex rules are worth it. */
127 topleft_xy = top_xy - 1;
128 topright_xy= top_xy + 1;
129 left_xy[1] = left_xy[0] = mb_xy-1;
130 left_block = left_block_options[0];
132 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
133 const int top_pair_xy = pair_xy - s->mb_stride;
134 const int topleft_pair_xy = top_pair_xy - 1;
135 const int topright_pair_xy = top_pair_xy + 1;
136 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
137 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
138 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
139 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
140 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
141 const int bottom = (s->mb_y & 1);
142 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
144 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
145 top_xy -= s->mb_stride;
147 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
155 topright_xy -= s->mb_stride;
157 if (left_mb_field_flag != curr_mb_field_flag) {
158 left_xy[1] = left_xy[0] = pair_xy - 1;
159 if (curr_mb_field_flag) {
160 left_xy[1] += s->mb_stride;
161 left_block = left_block_options[3];
163 left_block= left_block_options[2 - bottom];
168 h->top_mb_xy = top_xy;
169 h->left_mb_xy[0] = left_xy[0];
170 h->left_mb_xy[1] = left_xy[1];
174 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
175 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
176 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
178 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 for(list=0; list<h->list_count; list++){
181 //These values where changed for ease of performing MC, we need to change them back
182 //FIXME maybe we can make MC and loop filter use the same values or prevent
183 //the MC code from changing ref_cache and rather use a temporary array.
184 if(USES_LIST(mb_type,list)){
185 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
196 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
197 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
198 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
199 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
201 if(IS_INTRA(mb_type)){
202 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
203 h->topleft_samples_available=
204 h->top_samples_available=
205 h->left_samples_available= 0xFFFF;
206 h->topright_samples_available= 0xEEEA;
208 if(!(top_type & type_mask)){
209 h->topleft_samples_available= 0xB3FF;
210 h->top_samples_available= 0x33FF;
211 h->topright_samples_available= 0x26EA;
213 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
214 if(IS_INTERLACED(mb_type)){
215 if(!(left_type[0] & type_mask)){
216 h->topleft_samples_available&= 0xDFFF;
217 h->left_samples_available&= 0x5FFF;
219 if(!(left_type[1] & type_mask)){
220 h->topleft_samples_available&= 0xFF5F;
221 h->left_samples_available&= 0xFF5F;
224 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
225 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
226 assert(left_xy[0] == left_xy[1]);
227 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
228 h->topleft_samples_available&= 0xDF5F;
229 h->left_samples_available&= 0x5F5F;
233 if(!(left_type[0] & type_mask)){
234 h->topleft_samples_available&= 0xDF5F;
235 h->left_samples_available&= 0x5F5F;
239 if(!(topleft_type & type_mask))
240 h->topleft_samples_available&= 0x7FFF;
242 if(!(topright_type & type_mask))
243 h->topright_samples_available&= 0xFBFF;
245 if(IS_INTRA4x4(mb_type)){
246 if(IS_INTRA4x4(top_type)){
247 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
248 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
249 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
250 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
253 if(!(top_type & type_mask))
258 h->intra4x4_pred_mode_cache[4+8*0]=
259 h->intra4x4_pred_mode_cache[5+8*0]=
260 h->intra4x4_pred_mode_cache[6+8*0]=
261 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 if(IS_INTRA4x4(left_type[i])){
265 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
266 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
269 if(!(left_type[i] & type_mask))
274 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
275 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
294 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
295 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
296 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
298 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
299 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
301 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
302 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 h->non_zero_count_cache[4+8*0]=
306 h->non_zero_count_cache[5+8*0]=
307 h->non_zero_count_cache[6+8*0]=
308 h->non_zero_count_cache[7+8*0]=
310 h->non_zero_count_cache[1+8*0]=
311 h->non_zero_count_cache[2+8*0]=
313 h->non_zero_count_cache[1+8*3]=
314 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 for (i=0; i<2; i++) {
320 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
321 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
322 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 h->non_zero_count_cache[3+8*1 + 2*8*i]=
326 h->non_zero_count_cache[3+8*2 + 2*8*i]=
327 h->non_zero_count_cache[0+8*1 + 8*i]=
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
335 h->top_cbp = h->cbp_table[top_xy];
336 } else if(IS_INTRA(mb_type)) {
343 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
344 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 for(list=0; list<h->list_count; list++){
361 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
362 /*if(!h->mv_cache_clean[list]){
363 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
364 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
365 h->mv_cache_clean[list]= 1;
369 h->mv_cache_clean[list]= 0;
371 if(USES_LIST(top_type, list)){
372 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
373 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
378 h->ref_cache[list][scan8[0] + 0 - 1*8]=
379 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
380 h->ref_cache[list][scan8[0] + 2 - 1*8]=
381 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
387 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 int cache_idx = scan8[0] - 1 + i*2*8;
392 if(USES_LIST(left_type[i], list)){
393 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
394 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
395 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
396 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
397 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
398 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 *(uint32_t*)h->mv_cache [list][cache_idx ]=
401 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
402 h->ref_cache[list][cache_idx ]=
403 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
407 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
410 if(USES_LIST(topleft_type, list)){
411 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
412 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
413 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
414 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
417 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
420 if(USES_LIST(topright_type, list)){
421 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
422 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
423 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
424 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
427 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
430 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
433 h->ref_cache[list][scan8[5 ]+1] =
434 h->ref_cache[list][scan8[7 ]+1] =
435 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
436 h->ref_cache[list][scan8[4 ]] =
437 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
438 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
441 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
442 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 /* XXX beurk, Load mvd */
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 if(USES_LIST(left_type[0], list)){
459 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 if(USES_LIST(left_type[1], list)){
467 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
477 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
478 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
480 if(h->slice_type_nos == FF_B_TYPE){
481 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483 if(IS_DIRECT(top_type)){
484 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
485 }else if(IS_8X8(top_type)){
486 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
487 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
488 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
493 if(IS_DIRECT(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
495 else if(IS_8X8(left_type[0]))
496 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500 if(IS_DIRECT(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
502 else if(IS_8X8(left_type[1]))
503 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
511 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
512 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
517 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
520 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 #define MAP_F2F(idx, mb_type)\
523 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
524 h->ref_cache[list][idx] <<= 1;\
525 h->mv_cache[list][idx][1] /= 2;\
526 h->mvd_cache[list][idx][1] /= 2;\
531 #define MAP_F2F(idx, mb_type)\
532 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
533 h->ref_cache[list][idx] >>= 1;\
534 h->mv_cache[list][idx][1] <<= 1;\
535 h->mvd_cache[list][idx][1] <<= 1;\
545 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
548 static inline void write_back_intra_pred_mode(H264Context *h){
549 const int mb_xy= h->mb_xy;
551 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
552 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
553 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
554 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
555 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
556 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
557 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
561 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 static inline int check_intra4x4_pred_mode(H264Context *h){
564 MpegEncContext * const s = &h->s;
565 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
566 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
569 if(!(h->top_samples_available&0x8000)){
571 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
576 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
581 if((h->left_samples_available&0x8888)!=0x8888){
582 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 if(!(h->left_samples_available&mask[i])){
585 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
590 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 } //FIXME cleanup like next
600 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 static inline int check_intra_pred_mode(H264Context *h, int mode){
603 MpegEncContext * const s = &h->s;
604 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
605 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
612 if(!(h->top_samples_available&0x8000)){
615 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
620 if((h->left_samples_available&0x8080) != 0x8080){
622 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
623 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
635 * gets the predicted intra4x4 prediction mode.
637 static inline int pred_intra_mode(H264Context *h, int n){
638 const int index8= scan8[n];
639 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
640 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
641 const int min= FFMIN(left, top);
643 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
645 if(min<0) return DC_PRED;
649 static inline void write_back_non_zero_count(H264Context *h){
650 const int mb_xy= h->mb_xy;
652 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
653 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
654 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
655 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
656 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
657 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
658 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
660 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
661 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
662 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
664 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
665 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
666 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
670 * gets the predicted number of non-zero coefficients.
671 * @param n block index
673 static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
679 if(i<64) i= (i+1)>>1;
681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
686 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
688 MpegEncContext *s = &h->s;
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
705 if(!USES_LIST(mb_type,list))\
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
736 tprintf(s->avctx, "topright MV not available\n");
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
750 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 int diagonal_ref, match_count;
759 assert(part_width==1 || part_width==2 || part_width==4);
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
779 }else if(top_ref==ref){
787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
805 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
831 pred_motion(h, n, 4, list, ref, mx, my);
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
840 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860 if(diagonal_ref == ref){
868 pred_motion(h, n, 2, list, ref, mx, my);
871 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
878 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
879 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
885 pred_motion(h, 0, 4, 0, 0, mx, my);
890 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
891 int poc0 = h->ref_list[0][i].poc;
892 int td = av_clip(poc1 - poc0, -128, 127);
893 if(td == 0 || h->ref_list[0][i].long_ref){
896 int tb = av_clip(poc - poc0, -128, 127);
897 int tx = (16384 + (FFABS(td) >> 1)) / td;
898 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
902 static inline void direct_dist_scale_factor(H264Context * const h){
903 MpegEncContext * const s = &h->s;
904 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
905 const int poc1 = h->ref_list[1][0].poc;
907 for(field=0; field<2; field++){
908 const int poc = h->s.current_picture_ptr->field_poc[field];
909 const int poc1 = h->ref_list[1][0].field_poc[field];
910 for(i=0; i < 2*h->ref_count[0]; i++)
911 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
919 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 int j, old_ref, rfield;
923 int start= mbafi ? 16 : 0;
924 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
925 int interl= mbafi || s->picture_structure != PICT_FRAME;
927 /* bogus; fills in for missing frames */
928 memset(map[list], 0, sizeof(map[list]));
930 for(rfield=0; rfield<2; rfield++){
931 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
932 int poc = ref1->ref_poc[colfield][list][old_ref];
936 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
937 poc= (poc&~3) + rfield + 1;
939 for(j=start; j<end; j++){
940 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
941 int cur_ref= mbafi ? (j-16)^field : j;
942 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 map[list][old_ref] = cur_ref;
952 static inline void direct_ref_list_init(H264Context * const h){
953 MpegEncContext * const s = &h->s;
954 Picture * const ref1 = &h->ref_list[1][0];
955 Picture * const cur = s->current_picture_ptr;
957 int sidx= (s->picture_structure&1)^1;
958 int ref1sidx= (ref1->reference&1)^1;
960 for(list=0; list<2; list++){
961 cur->ref_count[sidx][list] = h->ref_count[list];
962 for(j=0; j<h->ref_count[list]; j++)
963 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
966 if(s->picture_structure == PICT_FRAME){
967 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
968 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
971 cur->mbaff= FRAME_MBAFF;
973 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
976 for(list=0; list<2; list++){
977 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
978 for(field=0; field<2; field++)
979 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
983 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
984 MpegEncContext * const s = &h->s;
985 int b8_stride = h->b8_stride;
986 int b4_stride = h->b_stride;
987 int mb_xy = h->mb_xy;
989 const int16_t (*l1mv0)[2], (*l1mv1)[2];
990 const int8_t *l1ref0, *l1ref1;
991 const int is_b8x8 = IS_8X8(*mb_type);
992 unsigned int sub_mb_type;
995 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
997 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
998 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
999 int cur_poc = s->current_picture_ptr->poc;
1000 int *col_poc = h->ref_list[1]->field_poc;
1001 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1002 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1004 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1005 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1006 mb_xy += s->mb_stride*fieldoff;
1009 }else{ // AFL/AFR/FR/FL -> AFR/FR
1010 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1011 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1012 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1013 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1016 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1017 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1018 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 }else{ // AFR/FR -> AFR/FR
1029 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1030 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1031 /* FIXME save sub mb types from previous frames (or derive from MVs)
1032 * so we know exactly what block size to use */
1033 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1034 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1035 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1036 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1037 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1045 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1046 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1047 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1048 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1051 l1ref0 += h->b8_stride;
1052 l1ref1 += h->b8_stride;
1053 l1mv0 += 2*b4_stride;
1054 l1mv1 += 2*b4_stride;
1058 if(h->direct_spatial_mv_pred){
1063 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1065 /* ref = min(neighbors) */
1066 for(list=0; list<2; list++){
1067 int refa = h->ref_cache[list][scan8[0] - 1];
1068 int refb = h->ref_cache[list][scan8[0] - 8];
1069 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1070 if(refc == PART_NOT_AVAILABLE)
1071 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1072 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1077 if(ref[0] < 0 && ref[1] < 0){
1078 ref[0] = ref[1] = 0;
1079 mv[0][0] = mv[0][1] =
1080 mv[1][0] = mv[1][1] = 0;
1082 for(list=0; list<2; list++){
1084 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1086 mv[list][0] = mv[list][1] = 0;
1092 *mb_type &= ~MB_TYPE_L1;
1093 sub_mb_type &= ~MB_TYPE_L1;
1094 }else if(ref[0] < 0){
1096 *mb_type &= ~MB_TYPE_L0;
1097 sub_mb_type &= ~MB_TYPE_L0;
1100 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1101 for(i8=0; i8<4; i8++){
1104 int xy8 = x8+y8*b8_stride;
1105 int xy4 = 3*x8+y8*b4_stride;
1108 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1110 h->sub_mb_type[i8] = sub_mb_type;
1112 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1114 if(!IS_INTRA(mb_type_col[y8])
1115 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1116 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1118 a= pack16to32(mv[0][0],mv[0][1]);
1120 b= pack16to32(mv[1][0],mv[1][1]);
1122 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1126 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1128 }else if(IS_16X16(*mb_type)){
1131 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1132 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1133 if(!IS_INTRA(mb_type_col[0])
1134 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1135 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1136 && (h->x264_build>33 || !h->x264_build)))){
1138 a= pack16to32(mv[0][0],mv[0][1]);
1140 b= pack16to32(mv[1][0],mv[1][1]);
1142 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1146 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1148 for(i8=0; i8<4; i8++){
1149 const int x8 = i8&1;
1150 const int y8 = i8>>1;
1152 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1154 h->sub_mb_type[i8] = sub_mb_type;
1156 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1158 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1159 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1162 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1163 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1164 && (h->x264_build>33 || !h->x264_build)))){
1165 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1166 if(IS_SUB_8X8(sub_mb_type)){
1167 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1168 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1170 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1172 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 for(i4=0; i4<4; i4++){
1176 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1177 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1179 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1181 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1187 }else{ /* direct temporal mv pred */
1188 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1189 const int *dist_scale_factor = h->dist_scale_factor;
1192 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1193 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1194 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1195 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1197 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1200 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1201 /* FIXME assumes direct_8x8_inference == 1 */
1202 int y_shift = 2*!IS_INTERLACED(*mb_type);
1204 for(i8=0; i8<4; i8++){
1205 const int x8 = i8&1;
1206 const int y8 = i8>>1;
1208 const int16_t (*l1mv)[2]= l1mv0;
1210 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1212 h->sub_mb_type[i8] = sub_mb_type;
1214 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 if(IS_INTRA(mb_type_col[y8])){
1216 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1217 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1218 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 ref0 = l1ref0[x8 + y8*b8_stride];
1224 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1226 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1229 scale = dist_scale_factor[ref0];
1230 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1233 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1234 int my_col = (mv_col[1]<<y_shift)/2;
1235 int mx = (scale * mv_col[0] + 128) >> 8;
1236 int my = (scale * my_col + 128) >> 8;
1237 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1238 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 /* one-to-one mv scaling */
1246 if(IS_16X16(*mb_type)){
1249 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1250 if(IS_INTRA(mb_type_col[0])){
1253 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1254 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1255 const int scale = dist_scale_factor[ref0];
1256 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1258 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1259 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1261 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1262 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1264 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1265 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1266 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1268 for(i8=0; i8<4; i8++){
1269 const int x8 = i8&1;
1270 const int y8 = i8>>1;
1272 const int16_t (*l1mv)[2]= l1mv0;
1274 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1276 h->sub_mb_type[i8] = sub_mb_type;
1277 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col[0])){
1279 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1280 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1281 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1287 ref0 = map_col_to_list0[0][ref0];
1289 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1292 scale = dist_scale_factor[ref0];
1294 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1295 if(IS_SUB_8X8(sub_mb_type)){
1296 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1297 int mx = (scale * mv_col[0] + 128) >> 8;
1298 int my = (scale * mv_col[1] + 128) >> 8;
1299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1302 for(i4=0; i4<4; i4++){
1303 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1304 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1305 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1306 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1307 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1308 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1315 static inline void write_back_motion(H264Context *h, int mb_type){
1316 MpegEncContext * const s = &h->s;
1317 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1318 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1321 if(!USES_LIST(mb_type, 0))
1322 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1324 for(list=0; list<h->list_count; list++){
1326 if(!USES_LIST(mb_type, list))
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1333 if( h->pps.cabac ) {
1334 if(IS_SKIP(mb_type))
1335 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1338 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1344 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1345 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1346 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1347 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1348 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1352 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1353 if(IS_8X8(mb_type)){
1354 uint8_t *direct_table = &h->direct_table[b8_xy];
1355 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1356 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1357 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1362 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1367 // src[0]&0x80; //forbidden bit
1368 h->nal_ref_idc= src[0]>>5;
1369 h->nal_unit_type= src[0]&0x1F;
1373 for(i=0; i<length; i++)
1374 printf("%2X ", src[i]);
1377 #if HAVE_FAST_UNALIGNED
1378 # if HAVE_FAST_64BIT
1380 for(i=0; i+1<length; i+=9){
1381 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1384 for(i=0; i+1<length; i+=5){
1385 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1388 if(i>0 && !src[i]) i--;
1392 for(i=0; i+1<length; i+=2){
1393 if(src[i]) continue;
1394 if(i>0 && src[i-1]==0) i--;
1396 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1398 /* startcode, so we must be past the end */
1406 if(i>=length-1){ //no escaped 0
1407 *dst_length= length;
1408 *consumed= length+1; //+1 for the header
1412 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1413 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1414 dst= h->rbsp_buffer[bufidx];
1420 //printf("decoding esc\n");
1421 memcpy(dst, src, i);
1424 //remove escapes (very rare 1:2^22)
1426 dst[di++]= src[si++];
1427 dst[di++]= src[si++];
1428 }else if(src[si]==0 && src[si+1]==0){
1429 if(src[si+2]==3){ //escape
1434 }else //next start code
1438 dst[di++]= src[si++];
1441 dst[di++]= src[si++];
1444 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1447 *consumed= si + 1;//+1 for the header
1448 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1452 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1456 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1466 * IDCT transforms the 16 dc values and dequantizes them.
1467 * @param qp quantization parameter
1469 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1472 int temp[16]; //FIXME check if this is a good idea
1473 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1474 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1476 //memset(block, 64, 2*256);
1479 const int offset= y_offset[i];
1480 const int z0= block[offset+stride*0] + block[offset+stride*4];
1481 const int z1= block[offset+stride*0] - block[offset+stride*4];
1482 const int z2= block[offset+stride*1] - block[offset+stride*5];
1483 const int z3= block[offset+stride*1] + block[offset+stride*5];
1492 const int offset= x_offset[i];
1493 const int z0= temp[4*0+i] + temp[4*2+i];
1494 const int z1= temp[4*0+i] - temp[4*2+i];
1495 const int z2= temp[4*1+i] - temp[4*3+i];
1496 const int z3= temp[4*1+i] + temp[4*3+i];
1498 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1499 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1500 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1501 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1507 * DCT transforms the 16 dc values.
1508 * @param qp quantization parameter ??? FIXME
1510 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1511 // const int qmul= dequant_coeff[qp][0];
1513 int temp[16]; //FIXME check if this is a good idea
1514 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1515 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1518 const int offset= y_offset[i];
1519 const int z0= block[offset+stride*0] + block[offset+stride*4];
1520 const int z1= block[offset+stride*0] - block[offset+stride*4];
1521 const int z2= block[offset+stride*1] - block[offset+stride*5];
1522 const int z3= block[offset+stride*1] + block[offset+stride*5];
1531 const int offset= x_offset[i];
1532 const int z0= temp[4*0+i] + temp[4*2+i];
1533 const int z1= temp[4*0+i] - temp[4*2+i];
1534 const int z2= temp[4*1+i] - temp[4*3+i];
1535 const int z3= temp[4*1+i] + temp[4*3+i];
1537 block[stride*0 +offset]= (z0 + z3)>>1;
1538 block[stride*2 +offset]= (z1 + z2)>>1;
1539 block[stride*8 +offset]= (z1 - z2)>>1;
1540 block[stride*10+offset]= (z0 - z3)>>1;
1548 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1549 const int stride= 16*2;
1550 const int xStride= 16;
1553 a= block[stride*0 + xStride*0];
1554 b= block[stride*0 + xStride*1];
1555 c= block[stride*1 + xStride*0];
1556 d= block[stride*1 + xStride*1];
1563 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1564 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1565 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1566 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1570 static void chroma_dc_dct_c(DCTELEM *block){
1571 const int stride= 16*2;
1572 const int xStride= 16;
1575 a= block[stride*0 + xStride*0];
1576 b= block[stride*0 + xStride*1];
1577 c= block[stride*1 + xStride*0];
1578 d= block[stride*1 + xStride*1];
1585 block[stride*0 + xStride*0]= (a+c);
1586 block[stride*0 + xStride*1]= (e+b);
1587 block[stride*1 + xStride*0]= (a-c);
1588 block[stride*1 + xStride*1]= (e-b);
1593 * gets the chroma qp.
1595 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1596 return h->pps.chroma_qp_table[t][qscale];
1599 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1600 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1601 int src_x_offset, int src_y_offset,
1602 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1603 MpegEncContext * const s = &h->s;
1604 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1605 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1606 const int luma_xy= (mx&3) + ((my&3)<<2);
1607 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1608 uint8_t * src_cb, * src_cr;
1609 int extra_width= h->emu_edge_width;
1610 int extra_height= h->emu_edge_height;
1612 const int full_mx= mx>>2;
1613 const int full_my= my>>2;
1614 const int pic_width = 16*s->mb_width;
1615 const int pic_height = 16*s->mb_height >> MB_FIELD;
1617 if(mx&7) extra_width -= 3;
1618 if(my&7) extra_height -= 3;
1620 if( full_mx < 0-extra_width
1621 || full_my < 0-extra_height
1622 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1623 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1624 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1625 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1629 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1631 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1634 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1637 // chroma offset when predicting from a field of opposite parity
1638 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1639 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1641 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1642 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1646 src_cb= s->edge_emu_buffer;
1648 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1651 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1652 src_cr= s->edge_emu_buffer;
1654 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1657 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1658 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1659 int x_offset, int y_offset,
1660 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1661 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1662 int list0, int list1){
1663 MpegEncContext * const s = &h->s;
1664 qpel_mc_func *qpix_op= qpix_put;
1665 h264_chroma_mc_func chroma_op= chroma_put;
1667 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1668 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1669 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1670 x_offset += 8*s->mb_x;
1671 y_offset += 8*(s->mb_y >> MB_FIELD);
1674 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1675 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1676 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1677 qpix_op, chroma_op);
1680 chroma_op= chroma_avg;
1684 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1685 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1686 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1687 qpix_op, chroma_op);
1691 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1692 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1693 int x_offset, int y_offset,
1694 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1695 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1696 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1697 int list0, int list1){
1698 MpegEncContext * const s = &h->s;
1700 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1701 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1702 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1703 x_offset += 8*s->mb_x;
1704 y_offset += 8*(s->mb_y >> MB_FIELD);
1707 /* don't optimize for luma-only case, since B-frames usually
1708 * use implicit weights => chroma too. */
1709 uint8_t *tmp_cb = s->obmc_scratchpad;
1710 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1711 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1712 int refn0 = h->ref_cache[0][ scan8[n] ];
1713 int refn1 = h->ref_cache[1][ scan8[n] ];
1715 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1716 dest_y, dest_cb, dest_cr,
1717 x_offset, y_offset, qpix_put, chroma_put);
1718 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1719 tmp_y, tmp_cb, tmp_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1722 if(h->use_weight == 2){
1723 int weight0 = h->implicit_weight[refn0][refn1];
1724 int weight1 = 64 - weight0;
1725 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1726 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1727 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1729 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1730 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1731 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1732 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1733 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1734 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1735 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1737 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1740 int list = list1 ? 1 : 0;
1741 int refn = h->ref_cache[list][ scan8[n] ];
1742 Picture *ref= &h->ref_list[list][refn];
1743 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1744 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1745 qpix_put, chroma_put);
1747 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1748 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1749 if(h->use_weight_chroma){
1750 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1751 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1752 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1753 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1758 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1759 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1760 int x_offset, int y_offset,
1761 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1762 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1763 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1764 int list0, int list1){
1765 if((h->use_weight==2 && list0 && list1
1766 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1767 || h->use_weight==1)
1768 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1769 x_offset, y_offset, qpix_put, chroma_put,
1770 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1772 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1773 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1776 static inline void prefetch_motion(H264Context *h, int list){
1777 /* fetch pixels for estimated mv 4 macroblocks ahead
1778 * optimized for 64byte cache lines */
1779 MpegEncContext * const s = &h->s;
1780 const int refn = h->ref_cache[list][scan8[0]];
1782 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1783 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1784 uint8_t **src= h->ref_list[list][refn].data;
1785 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1786 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1787 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1788 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1792 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1793 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1794 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1795 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1796 MpegEncContext * const s = &h->s;
1797 const int mb_xy= h->mb_xy;
1798 const int mb_type= s->current_picture.mb_type[mb_xy];
1800 assert(IS_INTER(mb_type));
1802 prefetch_motion(h, 0);
1804 if(IS_16X16(mb_type)){
1805 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1806 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1807 &weight_op[0], &weight_avg[0],
1808 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1809 }else if(IS_16X8(mb_type)){
1810 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1811 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1812 &weight_op[1], &weight_avg[1],
1813 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1814 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1815 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1816 &weight_op[1], &weight_avg[1],
1817 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1818 }else if(IS_8X16(mb_type)){
1819 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1820 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1821 &weight_op[2], &weight_avg[2],
1822 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1823 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1824 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1825 &weight_op[2], &weight_avg[2],
1826 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1830 assert(IS_8X8(mb_type));
1833 const int sub_mb_type= h->sub_mb_type[i];
1835 int x_offset= (i&1)<<2;
1836 int y_offset= (i&2)<<1;
1838 if(IS_SUB_8X8(sub_mb_type)){
1839 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1840 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1841 &weight_op[3], &weight_avg[3],
1842 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1843 }else if(IS_SUB_8X4(sub_mb_type)){
1844 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1845 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1846 &weight_op[4], &weight_avg[4],
1847 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1848 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1849 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1850 &weight_op[4], &weight_avg[4],
1851 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1852 }else if(IS_SUB_4X8(sub_mb_type)){
1853 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1854 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1855 &weight_op[5], &weight_avg[5],
1856 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1857 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1858 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1859 &weight_op[5], &weight_avg[5],
1860 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1863 assert(IS_SUB_4X4(sub_mb_type));
1865 int sub_x_offset= x_offset + 2*(j&1);
1866 int sub_y_offset= y_offset + (j&2);
1867 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1868 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1869 &weight_op[6], &weight_avg[6],
1870 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1876 prefetch_motion(h, 1);
1879 static av_cold void init_cavlc_level_tab(void){
1880 int suffix_length, mask;
1883 for(suffix_length=0; suffix_length<7; suffix_length++){
1884 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1885 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1886 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1888 mask= -(level_code&1);
1889 level_code= (((2+level_code)>>1) ^ mask) - mask;
1890 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1891 cavlc_level_tab[suffix_length][i][0]= level_code;
1892 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1893 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1897 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1898 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1904 static av_cold void decode_init_vlc(void){
1905 static int done = 0;
1912 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1913 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1914 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1915 &chroma_dc_coeff_token_len [0], 1, 1,
1916 &chroma_dc_coeff_token_bits[0], 1, 1,
1917 INIT_VLC_USE_NEW_STATIC);
1921 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1922 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1923 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1924 &coeff_token_len [i][0], 1, 1,
1925 &coeff_token_bits[i][0], 1, 1,
1926 INIT_VLC_USE_NEW_STATIC);
1927 offset += coeff_token_vlc_tables_size[i];
1930 * This is a one time safety check to make sure that
1931 * the packed static coeff_token_vlc table sizes
1932 * were initialized correctly.
1934 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1937 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1938 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1939 init_vlc(&chroma_dc_total_zeros_vlc[i],
1940 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1941 &chroma_dc_total_zeros_len [i][0], 1, 1,
1942 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1943 INIT_VLC_USE_NEW_STATIC);
1945 for(i=0; i<15; i++){
1946 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1947 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1948 init_vlc(&total_zeros_vlc[i],
1949 TOTAL_ZEROS_VLC_BITS, 16,
1950 &total_zeros_len [i][0], 1, 1,
1951 &total_zeros_bits[i][0], 1, 1,
1952 INIT_VLC_USE_NEW_STATIC);
1956 run_vlc[i].table = run_vlc_tables[i];
1957 run_vlc[i].table_allocated = run_vlc_tables_size;
1958 init_vlc(&run_vlc[i],
1960 &run_len [i][0], 1, 1,
1961 &run_bits[i][0], 1, 1,
1962 INIT_VLC_USE_NEW_STATIC);
1964 run7_vlc.table = run7_vlc_table,
1965 run7_vlc.table_allocated = run7_vlc_table_size;
1966 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1967 &run_len [6][0], 1, 1,
1968 &run_bits[6][0], 1, 1,
1969 INIT_VLC_USE_NEW_STATIC);
1971 init_cavlc_level_tab();
1975 static void free_tables(H264Context *h){
1978 av_freep(&h->intra4x4_pred_mode);
1979 av_freep(&h->chroma_pred_mode_table);
1980 av_freep(&h->cbp_table);
1981 av_freep(&h->mvd_table[0]);
1982 av_freep(&h->mvd_table[1]);
1983 av_freep(&h->direct_table);
1984 av_freep(&h->non_zero_count);
1985 av_freep(&h->slice_table_base);
1986 h->slice_table= NULL;
1988 av_freep(&h->mb2b_xy);
1989 av_freep(&h->mb2b8_xy);
1991 for(i = 0; i < h->s.avctx->thread_count; i++) {
1992 hx = h->thread_context[i];
1994 av_freep(&hx->top_borders[1]);
1995 av_freep(&hx->top_borders[0]);
1996 av_freep(&hx->s.obmc_scratchpad);
2000 static void init_dequant8_coeff_table(H264Context *h){
2002 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2003 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2004 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2006 for(i=0; i<2; i++ ){
2007 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2008 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2012 for(q=0; q<52; q++){
2013 int shift = div6[q];
2016 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2017 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2018 h->pps.scaling_matrix8[i][x]) << shift;
2023 static void init_dequant4_coeff_table(H264Context *h){
2025 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2026 for(i=0; i<6; i++ ){
2027 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2029 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2030 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2037 for(q=0; q<52; q++){
2038 int shift = div6[q] + 2;
2041 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2042 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2043 h->pps.scaling_matrix4[i][x]) << shift;
2048 static void init_dequant_tables(H264Context *h){
2050 init_dequant4_coeff_table(h);
2051 if(h->pps.transform_8x8_mode)
2052 init_dequant8_coeff_table(h);
2053 if(h->sps.transform_bypass){
2056 h->dequant4_coeff[i][0][x] = 1<<6;
2057 if(h->pps.transform_8x8_mode)
2060 h->dequant8_coeff[i][0][x] = 1<<6;
2067 * needs width/height
2069 static int alloc_tables(H264Context *h){
2070 MpegEncContext * const s = &h->s;
2071 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2074 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2076 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2077 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2078 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2080 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2081 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2082 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2083 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2085 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2086 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2088 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2089 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2090 for(y=0; y<s->mb_height; y++){
2091 for(x=0; x<s->mb_width; x++){
2092 const int mb_xy= x + y*s->mb_stride;
2093 const int b_xy = 4*x + 4*y*h->b_stride;
2094 const int b8_xy= 2*x + 2*y*h->b8_stride;
2096 h->mb2b_xy [mb_xy]= b_xy;
2097 h->mb2b8_xy[mb_xy]= b8_xy;
2101 s->obmc_scratchpad = NULL;
2103 if(!h->dequant4_coeff[0])
2104 init_dequant_tables(h);
2113 * Mimic alloc_tables(), but for every context thread.
2115 static void clone_tables(H264Context *dst, H264Context *src){
2116 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2117 dst->non_zero_count = src->non_zero_count;
2118 dst->slice_table = src->slice_table;
2119 dst->cbp_table = src->cbp_table;
2120 dst->mb2b_xy = src->mb2b_xy;
2121 dst->mb2b8_xy = src->mb2b8_xy;
2122 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2123 dst->mvd_table[0] = src->mvd_table[0];
2124 dst->mvd_table[1] = src->mvd_table[1];
2125 dst->direct_table = src->direct_table;
2127 dst->s.obmc_scratchpad = NULL;
2128 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2133 * Allocate buffers which are not shared amongst multiple threads.
2135 static int context_init(H264Context *h){
2136 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2137 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2141 return -1; // free_tables will clean up for us
2144 static av_cold void common_init(H264Context *h){
2145 MpegEncContext * const s = &h->s;
2147 s->width = s->avctx->width;
2148 s->height = s->avctx->height;
2149 s->codec_id= s->avctx->codec->id;
2151 ff_h264_pred_init(&h->hpc, s->codec_id);
2153 h->dequant_coeff_pps= -1;
2154 s->unrestricted_mv=1;
2155 s->decode=1; //FIXME
2157 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2159 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2160 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2163 static av_cold int decode_init(AVCodecContext *avctx){
2164 H264Context *h= avctx->priv_data;
2165 MpegEncContext * const s = &h->s;
2167 MPV_decode_defaults(s);
2172 s->out_format = FMT_H264;
2173 s->workaround_bugs= avctx->workaround_bugs;
2176 // s->decode_mb= ff_h263_decode_mb;
2177 s->quarter_sample = 1;
2180 if(avctx->codec_id == CODEC_ID_SVQ3)
2181 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2182 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2183 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2185 avctx->pix_fmt= PIX_FMT_YUV420P;
2189 if(avctx->extradata_size > 0 && avctx->extradata &&
2190 *(char *)avctx->extradata == 1){
2197 h->thread_context[0] = h;
2198 h->outputed_poc = INT_MIN;
2199 h->prev_poc_msb= 1<<16;
2200 h->sei_recovery_frame_cnt = -1;
2201 h->sei_dpb_output_delay = 0;
2202 h->sei_cpb_removal_delay = -1;
2203 h->sei_buffering_period_present = 0;
2207 static int frame_start(H264Context *h){
2208 MpegEncContext * const s = &h->s;
2211 if(MPV_frame_start(s, s->avctx) < 0)
2213 ff_er_frame_start(s);
2215 * MPV_frame_start uses pict_type to derive key_frame.
2216 * This is incorrect for H.264; IDR markings must be used.
2217 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2218 * See decode_nal_units().
2220 s->current_picture_ptr->key_frame= 0;
2222 assert(s->linesize && s->uvlinesize);
2224 for(i=0; i<16; i++){
2225 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2226 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2229 h->block_offset[16+i]=
2230 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2231 h->block_offset[24+16+i]=
2232 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2235 /* can't be in alloc_tables because linesize isn't known there.
2236 * FIXME: redo bipred weight to not require extra buffer? */
2237 for(i = 0; i < s->avctx->thread_count; i++)
2238 if(!h->thread_context[i]->s.obmc_scratchpad)
2239 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2241 /* some macroblocks will be accessed before they're available */
2242 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2243 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2245 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2247 // We mark the current picture as non-reference after allocating it, so
2248 // that if we break out due to an error it can be released automatically
2249 // in the next MPV_frame_start().
2250 // SVQ3 as well as most other codecs have only last/next/current and thus
2251 // get released even with set reference, besides SVQ3 and others do not
2252 // mark frames as reference later "naturally".
2253 if(s->codec_id != CODEC_ID_SVQ3)
2254 s->current_picture_ptr->reference= 0;
2256 s->current_picture_ptr->field_poc[0]=
2257 s->current_picture_ptr->field_poc[1]= INT_MAX;
2258 assert(s->current_picture_ptr->long_ref==0);
2263 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2264 MpegEncContext * const s = &h->s;
2273 src_cb -= uvlinesize;
2274 src_cr -= uvlinesize;
2276 if(!simple && FRAME_MBAFF){
2278 offset = MB_MBAFF ? 1 : 17;
2279 uvoffset= MB_MBAFF ? 1 : 9;
2281 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2282 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2283 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2284 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2290 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2291 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2292 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2293 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2299 top_idx = MB_MBAFF ? 0 : 1;
2301 step= MB_MBAFF ? 2 : 1;
2304 // There are two lines saved, the line above the the top macroblock of a pair,
2305 // and the line above the bottom macroblock
2306 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2307 for(i=1; i<17 - skiplast; i++){
2308 h->left_border[offset+i*step]= src_y[15+i* linesize];
2311 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2312 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2314 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2315 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2316 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2317 for(i=1; i<9 - skiplast; i++){
2318 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2319 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2321 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2322 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2326 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2327 MpegEncContext * const s = &h->s;
2338 if(!simple && FRAME_MBAFF){
2340 offset = MB_MBAFF ? 1 : 17;
2341 uvoffset= MB_MBAFF ? 1 : 9;
2345 top_idx = MB_MBAFF ? 0 : 1;
2347 step= MB_MBAFF ? 2 : 1;
2350 if(h->deblocking_filter == 2) {
2352 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2353 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2355 deblock_left = (s->mb_x > 0);
2356 deblock_top = (s->mb_y > !!MB_FIELD);
2359 src_y -= linesize + 1;
2360 src_cb -= uvlinesize + 1;
2361 src_cr -= uvlinesize + 1;
2363 #define XCHG(a,b,t,xchg)\
2370 for(i = !deblock_top; i<16; i++){
2371 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2373 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2377 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2378 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2379 if(s->mb_x+1 < s->mb_width){
2380 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2384 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2386 for(i = !deblock_top; i<8; i++){
2387 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2388 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2390 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2391 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2394 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2400 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2401 MpegEncContext * const s = &h->s;
2402 const int mb_x= s->mb_x;
2403 const int mb_y= s->mb_y;
2404 const int mb_xy= h->mb_xy;
2405 const int mb_type= s->current_picture.mb_type[mb_xy];
2406 uint8_t *dest_y, *dest_cb, *dest_cr;
2407 int linesize, uvlinesize /*dct_offset*/;
2409 int *block_offset = &h->block_offset[0];
2410 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2411 /* is_h264 should always be true if SVQ3 is disabled. */
2412 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2413 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2414 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2416 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2417 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2418 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2420 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2421 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2423 if (!simple && MB_FIELD) {
2424 linesize = h->mb_linesize = s->linesize * 2;
2425 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2426 block_offset = &h->block_offset[24];
2427 if(mb_y&1){ //FIXME move out of this function?
2428 dest_y -= s->linesize*15;
2429 dest_cb-= s->uvlinesize*7;
2430 dest_cr-= s->uvlinesize*7;
2434 for(list=0; list<h->list_count; list++){
2435 if(!USES_LIST(mb_type, list))
2437 if(IS_16X16(mb_type)){
2438 int8_t *ref = &h->ref_cache[list][scan8[0]];
2439 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2441 for(i=0; i<16; i+=4){
2442 int ref = h->ref_cache[list][scan8[i]];
2444 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2450 linesize = h->mb_linesize = s->linesize;
2451 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2452 // dct_offset = s->linesize * 16;
2455 if (!simple && IS_INTRA_PCM(mb_type)) {
2456 for (i=0; i<16; i++) {
2457 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2459 for (i=0; i<8; i++) {
2460 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2461 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2464 if(IS_INTRA(mb_type)){
2465 if(h->deblocking_filter)
2466 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2468 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2469 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2470 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2473 if(IS_INTRA4x4(mb_type)){
2474 if(simple || !s->encoding){
2475 if(IS_8x8DCT(mb_type)){
2476 if(transform_bypass){
2478 idct_add = s->dsp.add_pixels8;
2480 idct_dc_add = s->dsp.h264_idct8_dc_add;
2481 idct_add = s->dsp.h264_idct8_add;
2483 for(i=0; i<16; i+=4){
2484 uint8_t * const ptr= dest_y + block_offset[i];
2485 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2486 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2487 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2489 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2490 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2491 (h->topright_samples_available<<i)&0x4000, linesize);
2493 if(nnz == 1 && h->mb[i*16])
2494 idct_dc_add(ptr, h->mb + i*16, linesize);
2496 idct_add (ptr, h->mb + i*16, linesize);
2501 if(transform_bypass){
2503 idct_add = s->dsp.add_pixels4;
2505 idct_dc_add = s->dsp.h264_idct_dc_add;
2506 idct_add = s->dsp.h264_idct_add;
2508 for(i=0; i<16; i++){
2509 uint8_t * const ptr= dest_y + block_offset[i];
2510 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2512 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2513 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2517 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2518 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2519 assert(mb_y || linesize <= block_offset[i]);
2520 if(!topright_avail){
2521 tr= ptr[3 - linesize]*0x01010101;
2522 topright= (uint8_t*) &tr;
2524 topright= ptr + 4 - linesize;
2528 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2529 nnz = h->non_zero_count_cache[ scan8[i] ];
2532 if(nnz == 1 && h->mb[i*16])
2533 idct_dc_add(ptr, h->mb + i*16, linesize);
2535 idct_add (ptr, h->mb + i*16, linesize);
2537 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2544 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2546 if(!transform_bypass)
2547 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2549 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2551 if(h->deblocking_filter)
2552 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2554 hl_motion(h, dest_y, dest_cb, dest_cr,
2555 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2556 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2557 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2561 if(!IS_INTRA4x4(mb_type)){
2563 if(IS_INTRA16x16(mb_type)){
2564 if(transform_bypass){
2565 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2566 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2568 for(i=0; i<16; i++){
2569 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2570 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2574 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2576 }else if(h->cbp&15){
2577 if(transform_bypass){
2578 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2579 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2580 for(i=0; i<16; i+=di){
2581 if(h->non_zero_count_cache[ scan8[i] ]){
2582 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2586 if(IS_8x8DCT(mb_type)){
2587 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2589 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2594 for(i=0; i<16; i++){
2595 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2596 uint8_t * const ptr= dest_y + block_offset[i];
2597 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2603 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2604 uint8_t *dest[2] = {dest_cb, dest_cr};
2605 if(transform_bypass){
2606 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2607 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2608 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2610 idct_add = s->dsp.add_pixels4;
2611 for(i=16; i<16+8; i++){
2612 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2613 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2617 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2618 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2620 idct_add = s->dsp.h264_idct_add;
2621 idct_dc_add = s->dsp.h264_idct_dc_add;
2622 for(i=16; i<16+8; i++){
2623 if(h->non_zero_count_cache[ scan8[i] ])
2624 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2625 else if(h->mb[i*16])
2626 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2629 for(i=16; i<16+8; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2631 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2632 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2639 if(h->cbp || IS_INTRA(mb_type))
2640 s->dsp.clear_blocks(h->mb);
2642 if(h->deblocking_filter) {
2643 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2644 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2645 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2646 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2647 if (!simple && FRAME_MBAFF) {
2648 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2650 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2656 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2658 static void hl_decode_mb_simple(H264Context *h){
2659 hl_decode_mb_internal(h, 1);
2663 * Process a macroblock; this handles edge cases, such as interlacing.
2665 static void av_noinline hl_decode_mb_complex(H264Context *h){
2666 hl_decode_mb_internal(h, 0);
2669 static void hl_decode_mb(H264Context *h){
2670 MpegEncContext * const s = &h->s;
2671 const int mb_xy= h->mb_xy;
2672 const int mb_type= s->current_picture.mb_type[mb_xy];
2673 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2676 hl_decode_mb_complex(h);
2677 else hl_decode_mb_simple(h);
2680 static void pic_as_field(Picture *pic, const int parity){
2682 for (i = 0; i < 4; ++i) {
2683 if (parity == PICT_BOTTOM_FIELD)
2684 pic->data[i] += pic->linesize[i];
2685 pic->reference = parity;
2686 pic->linesize[i] *= 2;
2688 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2691 static int split_field_copy(Picture *dest, Picture *src,
2692 int parity, int id_add){
2693 int match = !!(src->reference & parity);
2697 if(parity != PICT_FRAME){
2698 pic_as_field(dest, parity);
2700 dest->pic_id += id_add;
2707 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2711 while(i[0]<len || i[1]<len){
2712 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2714 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2717 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2718 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2721 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2722 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2729 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2734 best_poc= dir ? INT_MIN : INT_MAX;
2736 for(i=0; i<len; i++){
2737 const int poc= src[i]->poc;
2738 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2740 sorted[out_i]= src[i];
2743 if(best_poc == (dir ? INT_MIN : INT_MAX))
2745 limit= sorted[out_i++]->poc - dir;
2751 * fills the default_ref_list.
2753 static int fill_default_ref_list(H264Context *h){
2754 MpegEncContext * const s = &h->s;
2757 if(h->slice_type_nos==FF_B_TYPE){
2758 Picture *sorted[32];
2763 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2765 cur_poc= s->current_picture_ptr->poc;
2767 for(list= 0; list<2; list++){
2768 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2769 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2771 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2772 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2775 if(len < h->ref_count[list])
2776 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2780 if(lens[0] == lens[1] && lens[1] > 1){
2781 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2783 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2786 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2787 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2789 if(len < h->ref_count[0])
2790 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2793 for (i=0; i<h->ref_count[0]; i++) {
2794 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2796 if(h->slice_type_nos==FF_B_TYPE){
2797 for (i=0; i<h->ref_count[1]; i++) {
2798 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2805 static void print_short_term(H264Context *h);
2806 static void print_long_term(H264Context *h);
2809 * Extract structure information about the picture described by pic_num in
2810 * the current decoding context (frame or field). Note that pic_num is
2811 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2812 * @param pic_num picture number for which to extract structure information
2813 * @param structure one of PICT_XXX describing structure of picture
2815 * @return frame number (short term) or long term index of picture
2816 * described by pic_num
2818 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2819 MpegEncContext * const s = &h->s;
2821 *structure = s->picture_structure;
2824 /* opposite field */
2825 *structure ^= PICT_FRAME;
2832 static int decode_ref_pic_list_reordering(H264Context *h){
2833 MpegEncContext * const s = &h->s;
2834 int list, index, pic_structure;
2836 print_short_term(h);
2839 for(list=0; list<h->list_count; list++){
2840 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2842 if(get_bits1(&s->gb)){
2843 int pred= h->curr_pic_num;
2845 for(index=0; ; index++){
2846 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2847 unsigned int pic_id;
2849 Picture *ref = NULL;
2851 if(reordering_of_pic_nums_idc==3)
2854 if(index >= h->ref_count[list]){
2855 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2859 if(reordering_of_pic_nums_idc<3){
2860 if(reordering_of_pic_nums_idc<2){
2861 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2864 if(abs_diff_pic_num > h->max_pic_num){
2865 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2869 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2870 else pred+= abs_diff_pic_num;
2871 pred &= h->max_pic_num - 1;
2873 frame_num = pic_num_extract(h, pred, &pic_structure);
2875 for(i= h->short_ref_count-1; i>=0; i--){
2876 ref = h->short_ref[i];
2877 assert(ref->reference);
2878 assert(!ref->long_ref);
2880 ref->frame_num == frame_num &&
2881 (ref->reference & pic_structure)
2889 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2891 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2894 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2897 ref = h->long_ref[long_idx];
2898 assert(!(ref && !ref->reference));
2899 if(ref && (ref->reference & pic_structure)){
2900 ref->pic_id= pic_id;
2901 assert(ref->long_ref);
2909 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2910 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2912 for(i=index; i+1<h->ref_count[list]; i++){
2913 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2916 for(; i > index; i--){
2917 h->ref_list[list][i]= h->ref_list[list][i-1];
2919 h->ref_list[list][index]= *ref;
2921 pic_as_field(&h->ref_list[list][index], pic_structure);
2925 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2931 for(list=0; list<h->list_count; list++){
2932 for(index= 0; index < h->ref_count[list]; index++){
2933 if(!h->ref_list[list][index].data[0]){
2934 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2935 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2943 static void fill_mbaff_ref_list(H264Context *h){
2945 for(list=0; list<2; list++){ //FIXME try list_count
2946 for(i=0; i<h->ref_count[list]; i++){
2947 Picture *frame = &h->ref_list[list][i];
2948 Picture *field = &h->ref_list[list][16+2*i];
2951 field[0].linesize[j] <<= 1;
2952 field[0].reference = PICT_TOP_FIELD;
2953 field[0].poc= field[0].field_poc[0];
2954 field[1] = field[0];
2956 field[1].data[j] += frame->linesize[j];
2957 field[1].reference = PICT_BOTTOM_FIELD;
2958 field[1].poc= field[1].field_poc[1];
2960 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2961 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2963 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2964 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2968 for(j=0; j<h->ref_count[1]; j++){
2969 for(i=0; i<h->ref_count[0]; i++)
2970 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2971 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2972 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2976 static int pred_weight_table(H264Context *h){
2977 MpegEncContext * const s = &h->s;
2979 int luma_def, chroma_def;
2982 h->use_weight_chroma= 0;
2983 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2984 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2985 luma_def = 1<<h->luma_log2_weight_denom;
2986 chroma_def = 1<<h->chroma_log2_weight_denom;
2988 for(list=0; list<2; list++){
2989 h->luma_weight_flag[list] = 0;
2990 h->chroma_weight_flag[list] = 0;
2991 for(i=0; i<h->ref_count[list]; i++){
2992 int luma_weight_flag, chroma_weight_flag;
2994 luma_weight_flag= get_bits1(&s->gb);
2995 if(luma_weight_flag){
2996 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2997 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2998 if( h->luma_weight[list][i] != luma_def
2999 || h->luma_offset[list][i] != 0) {
3001 h->luma_weight_flag[list]= 1;
3004 h->luma_weight[list][i]= luma_def;
3005 h->luma_offset[list][i]= 0;
3009 chroma_weight_flag= get_bits1(&s->gb);
3010 if(chroma_weight_flag){
3013 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3014 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3015 if( h->chroma_weight[list][i][j] != chroma_def
3016 || h->chroma_offset[list][i][j] != 0) {
3017 h->use_weight_chroma= 1;
3018 h->chroma_weight_flag[list]= 1;
3024 h->chroma_weight[list][i][j]= chroma_def;
3025 h->chroma_offset[list][i][j]= 0;
3030 if(h->slice_type_nos != FF_B_TYPE) break;
3032 h->use_weight= h->use_weight || h->use_weight_chroma;
3036 static void implicit_weight_table(H264Context *h){
3037 MpegEncContext * const s = &h->s;
3039 int cur_poc = s->current_picture_ptr->poc;
3041 for (i = 0; i < 2; i++) {
3042 h->luma_weight_flag[i] = 0;
3043 h->chroma_weight_flag[i] = 0;
3046 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3047 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3049 h->use_weight_chroma= 0;
3054 h->use_weight_chroma= 2;
3055 h->luma_log2_weight_denom= 5;
3056 h->chroma_log2_weight_denom= 5;
3058 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3059 int poc0 = h->ref_list[0][ref0].poc;
3060 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3061 int poc1 = h->ref_list[1][ref1].poc;
3062 int td = av_clip(poc1 - poc0, -128, 127);
3064 int tb = av_clip(cur_poc - poc0, -128, 127);
3065 int tx = (16384 + (FFABS(td) >> 1)) / td;
3066 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3067 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3068 h->implicit_weight[ref0][ref1] = 32;
3070 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3072 h->implicit_weight[ref0][ref1] = 32;
3078 * Mark a picture as no longer needed for reference. The refmask
3079 * argument allows unreferencing of individual fields or the whole frame.
3080 * If the picture becomes entirely unreferenced, but is being held for
3081 * display purposes, it is marked as such.
3082 * @param refmask mask of fields to unreference; the mask is bitwise
3083 * anded with the reference marking of pic
3084 * @return non-zero if pic becomes entirely unreferenced (except possibly
3085 * for display purposes) zero if one of the fields remains in
3088 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3090 if (pic->reference &= refmask) {
3093 for(i = 0; h->delayed_pic[i]; i++)
3094 if(pic == h->delayed_pic[i]){
3095 pic->reference=DELAYED_PIC_REF;
3103 * instantaneous decoder refresh.
3105 static void idr(H264Context *h){
3108 for(i=0; i<16; i++){
3109 remove_long(h, i, 0);
3111 assert(h->long_ref_count==0);
3113 for(i=0; i<h->short_ref_count; i++){
3114 unreference_pic(h, h->short_ref[i], 0);
3115 h->short_ref[i]= NULL;
3117 h->short_ref_count=0;
3118 h->prev_frame_num= 0;
3119 h->prev_frame_num_offset= 0;
3124 /* forget old pics after a seek */
3125 static void flush_dpb(AVCodecContext *avctx){
3126 H264Context *h= avctx->priv_data;
3128 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3129 if(h->delayed_pic[i])
3130 h->delayed_pic[i]->reference= 0;
3131 h->delayed_pic[i]= NULL;
3133 h->outputed_poc= INT_MIN;
3135 if(h->s.current_picture_ptr)
3136 h->s.current_picture_ptr->reference= 0;
3137 h->s.first_field= 0;
3138 h->sei_recovery_frame_cnt = -1;
3139 h->sei_dpb_output_delay = 0;
3140 h->sei_cpb_removal_delay = -1;
3141 h->sei_buffering_period_present = 0;
3142 ff_mpeg_flush(avctx);
3146 * Find a Picture in the short term reference list by frame number.
3147 * @param frame_num frame number to search for
3148 * @param idx the index into h->short_ref where returned picture is found
3149 * undefined if no picture found.
3150 * @return pointer to the found picture, or NULL if no pic with the provided
3151 * frame number is found
3153 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3154 MpegEncContext * const s = &h->s;
3157 for(i=0; i<h->short_ref_count; i++){
3158 Picture *pic= h->short_ref[i];
3159 if(s->avctx->debug&FF_DEBUG_MMCO)
3160 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3161 if(pic->frame_num == frame_num) {
3170 * Remove a picture from the short term reference list by its index in
3171 * that list. This does no checking on the provided index; it is assumed
3172 * to be valid. Other list entries are shifted down.
3173 * @param i index into h->short_ref of picture to remove.
3175 static void remove_short_at_index(H264Context *h, int i){
3176 assert(i >= 0 && i < h->short_ref_count);
3177 h->short_ref[i]= NULL;
3178 if (--h->short_ref_count)
3179 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3184 * @return the removed picture or NULL if an error occurs
3186 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3187 MpegEncContext * const s = &h->s;
3191 if(s->avctx->debug&FF_DEBUG_MMCO)
3192 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3194 pic = find_short(h, frame_num, &i);
3196 if(unreference_pic(h, pic, ref_mask))
3197 remove_short_at_index(h, i);
3204 * Remove a picture from the long term reference list by its index in
3206 * @return the removed picture or NULL if an error occurs
3208 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3211 pic= h->long_ref[i];
3213 if(unreference_pic(h, pic, ref_mask)){
3214 assert(h->long_ref[i]->long_ref == 1);
3215 h->long_ref[i]->long_ref= 0;
3216 h->long_ref[i]= NULL;
3217 h->long_ref_count--;
3225 * print short term list
3227 static void print_short_term(H264Context *h) {
3229 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3230 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3231 for(i=0; i<h->short_ref_count; i++){
3232 Picture *pic= h->short_ref[i];
3233 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3239 * print long term list
3241 static void print_long_term(H264Context *h) {
3243 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3244 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3245 for(i = 0; i < 16; i++){
3246 Picture *pic= h->long_ref[i];
3248 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3255 * Executes the reference picture marking (memory management control operations).
3257 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3258 MpegEncContext * const s = &h->s;
3260 int current_ref_assigned=0;
3263 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3264 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3266 for(i=0; i<mmco_count; i++){
3267 int structure, frame_num;
3268 if(s->avctx->debug&FF_DEBUG_MMCO)
3269 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3271 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3272 || mmco[i].opcode == MMCO_SHORT2LONG){
3273 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3274 pic = find_short(h, frame_num, &j);
3276 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3277 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3278 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3283 switch(mmco[i].opcode){
3284 case MMCO_SHORT2UNUSED:
3285 if(s->avctx->debug&FF_DEBUG_MMCO)
3286 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3287 remove_short(h, frame_num, structure ^ PICT_FRAME);
3289 case MMCO_SHORT2LONG:
3290 if (h->long_ref[mmco[i].long_arg] != pic)
3291 remove_long(h, mmco[i].long_arg, 0);
3293 remove_short_at_index(h, j);
3294 h->long_ref[ mmco[i].long_arg ]= pic;
3295 if (h->long_ref[ mmco[i].long_arg ]){
3296 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3297 h->long_ref_count++;
3300 case MMCO_LONG2UNUSED:
3301 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3302 pic = h->long_ref[j];
3304 remove_long(h, j, structure ^ PICT_FRAME);
3305 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3306 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3309 // Comment below left from previous code as it is an interresting note.
3310 /* First field in pair is in short term list or
3311 * at a different long term index.
3312 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3313 * Report the problem and keep the pair where it is,
3314 * and mark this field valid.
3317 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3318 remove_long(h, mmco[i].long_arg, 0);
3320 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3321 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3322 h->long_ref_count++;
3325 s->current_picture_ptr->reference |= s->picture_structure;
3326 current_ref_assigned=1;
3328 case MMCO_SET_MAX_LONG:
3329 assert(mmco[i].long_arg <= 16);
3330 // just remove the long term which index is greater than new max
3331 for(j = mmco[i].long_arg; j<16; j++){
3332 remove_long(h, j, 0);
3336 while(h->short_ref_count){
3337 remove_short(h, h->short_ref[0]->frame_num, 0);
3339 for(j = 0; j < 16; j++) {
3340 remove_long(h, j, 0);
3342 s->current_picture_ptr->poc=
3343 s->current_picture_ptr->field_poc[0]=
3344 s->current_picture_ptr->field_poc[1]=
3348 s->current_picture_ptr->frame_num= 0;
3354 if (!current_ref_assigned) {
3355 /* Second field of complementary field pair; the first field of
3356 * which is already referenced. If short referenced, it
3357 * should be first entry in short_ref. If not, it must exist
3358 * in long_ref; trying to put it on the short list here is an
3359 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3361 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3362 /* Just mark the second field valid */
3363 s->current_picture_ptr->reference = PICT_FRAME;
3364 } else if (s->current_picture_ptr->long_ref) {
3365 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3366 "assignment for second field "
3367 "in complementary field pair "
3368 "(first field is long term)\n");
3370 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3372 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3375 if(h->short_ref_count)
3376 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3378 h->short_ref[0]= s->current_picture_ptr;
3379 h->short_ref_count++;
3380 s->current_picture_ptr->reference |= s->picture_structure;
3384 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3386 /* We have too many reference frames, probably due to corrupted
3387 * stream. Need to discard one frame. Prevents overrun of the
3388 * short_ref and long_ref buffers.
3390 av_log(h->s.avctx, AV_LOG_ERROR,
3391 "number of reference frames exceeds max (probably "
3392 "corrupt input), discarding one\n");
3394 if (h->long_ref_count && !h->short_ref_count) {
3395 for (i = 0; i < 16; ++i)
3400 remove_long(h, i, 0);
3402 pic = h->short_ref[h->short_ref_count - 1];
3403 remove_short(h, pic->frame_num, 0);
3407 print_short_term(h);
3412 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3413 MpegEncContext * const s = &h->s;
3417 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3418 s->broken_link= get_bits1(gb) -1;
3420 h->mmco[0].opcode= MMCO_LONG;
3421 h->mmco[0].long_arg= 0;
3425 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3426 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3427 MMCOOpcode opcode= get_ue_golomb_31(gb);
3429 h->mmco[i].opcode= opcode;
3430 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3431 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3432 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3433 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3437 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3438 unsigned int long_arg= get_ue_golomb_31(gb);
3439 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3440 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3443 h->mmco[i].long_arg= long_arg;
3446 if(opcode > (unsigned)MMCO_LONG){
3447 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3450 if(opcode == MMCO_END)
3455 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3457 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3458 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3459 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3460 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3462 if (FIELD_PICTURE) {
3463 h->mmco[0].short_pic_num *= 2;
3464 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3465 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3475 static int init_poc(H264Context *h){
3476 MpegEncContext * const s = &h->s;
3477 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3479 Picture *cur = s->current_picture_ptr;
3481 h->frame_num_offset= h->prev_frame_num_offset;
3482 if(h->frame_num < h->prev_frame_num)
3483 h->frame_num_offset += max_frame_num;
3485 if(h->sps.poc_type==0){
3486 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3488 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3489 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3490 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3491 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3493 h->poc_msb = h->prev_poc_msb;
3494 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3496 field_poc[1] = h->poc_msb + h->poc_lsb;
3497 if(s->picture_structure == PICT_FRAME)
3498 field_poc[1] += h->delta_poc_bottom;
3499 }else if(h->sps.poc_type==1){
3500 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3503 if(h->sps.poc_cycle_length != 0)
3504 abs_frame_num = h->frame_num_offset + h->frame_num;
3508 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3511 expected_delta_per_poc_cycle = 0;
3512 for(i=0; i < h->sps.poc_cycle_length; i++)
3513 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3515 if(abs_frame_num > 0){
3516 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3517 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3519 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3520 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3521 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3525 if(h->nal_ref_idc == 0)
3526 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3528 field_poc[0] = expectedpoc + h->delta_poc[0];
3529 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3531 if(s->picture_structure == PICT_FRAME)
3532 field_poc[1] += h->delta_poc[1];
3534 int poc= 2*(h->frame_num_offset + h->frame_num);
3543 if(s->picture_structure != PICT_BOTTOM_FIELD)
3544 s->current_picture_ptr->field_poc[0]= field_poc[0];
3545 if(s->picture_structure != PICT_TOP_FIELD)
3546 s->current_picture_ptr->field_poc[1]= field_poc[1];
3547 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3554 * initialize scan tables
3556 static void init_scan_tables(H264Context *h){
3557 MpegEncContext * const s = &h->s;
3559 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3560 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3561 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3563 for(i=0; i<16; i++){
3564 #define T(x) (x>>2) | ((x<<2) & 0xF)
3565 h->zigzag_scan[i] = T(zigzag_scan[i]);
3566 h-> field_scan[i] = T( field_scan[i]);
3570 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3571 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3572 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3573 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3574 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3576 for(i=0; i<64; i++){
3577 #define T(x) (x>>3) | ((x&7)<<3)
3578 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3579 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3580 h->field_scan8x8[i] = T(field_scan8x8[i]);
3581 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3585 if(h->sps.transform_bypass){ //FIXME same ugly
3586 h->zigzag_scan_q0 = zigzag_scan;
3587 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3588 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3589 h->field_scan_q0 = field_scan;
3590 h->field_scan8x8_q0 = field_scan8x8;
3591 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3593 h->zigzag_scan_q0 = h->zigzag_scan;
3594 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3595 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3596 h->field_scan_q0 = h->field_scan;
3597 h->field_scan8x8_q0 = h->field_scan8x8;
3598 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3603 * Replicates H264 "master" context to thread contexts.
3605 static void clone_slice(H264Context *dst, H264Context *src)
3607 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3608 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3609 dst->s.current_picture = src->s.current_picture;
3610 dst->s.linesize = src->s.linesize;
3611 dst->s.uvlinesize = src->s.uvlinesize;
3612 dst->s.first_field = src->s.first_field;
3614 dst->prev_poc_msb = src->prev_poc_msb;
3615 dst->prev_poc_lsb = src->prev_poc_lsb;
3616 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3617 dst->prev_frame_num = src->prev_frame_num;
3618 dst->short_ref_count = src->short_ref_count;
3620 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3621 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3622 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3623 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3625 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3626 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3630 * decodes a slice header.
3631 * This will also call MPV_common_init() and frame_start() as needed.
3633 * @param h h264context
3634 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3636 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3638 static int decode_slice_header(H264Context *h, H264Context *h0){
3639 MpegEncContext * const s = &h->s;
3640 MpegEncContext * const s0 = &h0->s;
3641 unsigned int first_mb_in_slice;
3642 unsigned int pps_id;
3643 int num_ref_idx_active_override_flag;
3644 unsigned int slice_type, tmp, i, j;
3645 int default_ref_list_done = 0;
3646 int last_pic_structure;
3648 s->dropable= h->nal_ref_idc == 0;
3650 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3651 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3652 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3654 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3655 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3658 first_mb_in_slice= get_ue_golomb(&s->gb);
3660 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3661 h0->current_slice = 0;
3662 if (!s0->first_field)
3663 s->current_picture_ptr= NULL;
3666 slice_type= get_ue_golomb_31(&s->gb);
3668 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3673 h->slice_type_fixed=1;
3675 h->slice_type_fixed=0;
3677 slice_type= golomb_to_pict_type[ slice_type ];
3678 if (slice_type == FF_I_TYPE
3679 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3680 default_ref_list_done = 1;
3682 h->slice_type= slice_type;
3683 h->slice_type_nos= slice_type & 3;
3685 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3686 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3687 av_log(h->s.avctx, AV_LOG_ERROR,
3688 "B picture before any references, skipping\n");
3692 pps_id= get_ue_golomb(&s->gb);
3693 if(pps_id>=MAX_PPS_COUNT){
3694 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3697 if(!h0->pps_buffers[pps_id]) {
3698 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3701 h->pps= *h0->pps_buffers[pps_id];
3703 if(!h0->sps_buffers[h->pps.sps_id]) {
3704 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3707 h->sps = *h0->sps_buffers[h->pps.sps_id];
3709 if(h == h0 && h->dequant_coeff_pps != pps_id){
3710 h->dequant_coeff_pps = pps_id;
3711 init_dequant_tables(h);
3714 s->mb_width= h->sps.mb_width;
3715 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3717 h->b_stride= s->mb_width*4;
3718 h->b8_stride= s->mb_width*2;
3720 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3721 if(h->sps.frame_mbs_only_flag)
3722 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3724 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3726 if (s->context_initialized
3727 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3729 return -1; // width / height changed during parallelized decoding
3731 flush_dpb(s->avctx);
3734 if (!s->context_initialized) {
3736 return -1; // we cant (re-)initialize context during parallel decoding
3737 if (MPV_common_init(s) < 0)
3741 init_scan_tables(h);
3744 for(i = 1; i < s->avctx->thread_count; i++) {
3746 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3747 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3748 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3751 init_scan_tables(c);
3755 for(i = 0; i < s->avctx->thread_count; i++)
3756 if(context_init(h->thread_context[i]) < 0)
3759 s->avctx->width = s->width;
3760 s->avctx->height = s->height;
3761 s->avctx->sample_aspect_ratio= h->sps.sar;
3762 if(!s->avctx->sample_aspect_ratio.den)
3763 s->avctx->sample_aspect_ratio.den = 1;
3765 if(h->sps.timing_info_present_flag){
3766 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3767 if(h->x264_build > 0 && h->x264_build < 44)
3768 s->avctx->time_base.den *= 2;
3769 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3770 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3774 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3777 h->mb_aff_frame = 0;
3778 last_pic_structure = s0->picture_structure;
3779 if(h->sps.frame_mbs_only_flag){
3780 s->picture_structure= PICT_FRAME;
3782 if(get_bits1(&s->gb)) { //field_pic_flag
3783 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3785 s->picture_structure= PICT_FRAME;
3786 h->mb_aff_frame = h->sps.mb_aff;
3789 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3791 if(h0->current_slice == 0){
3792 while(h->frame_num != h->prev_frame_num &&
3793 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3794 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3796 h->prev_frame_num++;
3797 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3798 s->current_picture_ptr->frame_num= h->prev_frame_num;
3799 execute_ref_pic_marking(h, NULL, 0);
3802 /* See if we have a decoded first field looking for a pair... */
3803 if (s0->first_field) {
3804 assert(s0->current_picture_ptr);
3805 assert(s0->current_picture_ptr->data[0]);
3806 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3808 /* figure out if we have a complementary field pair */
3809 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3811 * Previous field is unmatched. Don't display it, but let it
3812 * remain for reference if marked as such.
3814 s0->current_picture_ptr = NULL;
3815 s0->first_field = FIELD_PICTURE;
3818 if (h->nal_ref_idc &&
3819 s0->current_picture_ptr->reference &&
3820 s0->current_picture_ptr->frame_num != h->frame_num) {
3822 * This and previous field were reference, but had
3823 * different frame_nums. Consider this field first in
3824 * pair. Throw away previous field except for reference
3827 s0->first_field = 1;
3828 s0->current_picture_ptr = NULL;
3831 /* Second field in complementary pair */
3832 s0->first_field = 0;
3837 /* Frame or first field in a potentially complementary pair */
3838 assert(!s0->current_picture_ptr);
3839 s0->first_field = FIELD_PICTURE;
3842 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3843 s0->first_field = 0;
3850 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3852 assert(s->mb_num == s->mb_width * s->mb_height);
3853 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3854 first_mb_in_slice >= s->mb_num){
3855 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3858 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3859 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3860 if (s->picture_structure == PICT_BOTTOM_FIELD)
3861 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3862 assert(s->mb_y < s->mb_height);
3864 if(s->picture_structure==PICT_FRAME){
3865 h->curr_pic_num= h->frame_num;
3866 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3868 h->curr_pic_num= 2*h->frame_num + 1;
3869 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3872 if(h->nal_unit_type == NAL_IDR_SLICE){
3873 get_ue_golomb(&s->gb); /* idr_pic_id */
3876 if(h->sps.poc_type==0){
3877 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3879 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3880 h->delta_poc_bottom= get_se_golomb(&s->gb);
3884 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3885 h->delta_poc[0]= get_se_golomb(&s->gb);
3887 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3888 h->delta_poc[1]= get_se_golomb(&s->gb);
3893 if(h->pps.redundant_pic_cnt_present){
3894 h->redundant_pic_count= get_ue_golomb(&s->gb);
3897 //set defaults, might be overridden a few lines later
3898 h->ref_count[0]= h->pps.ref_count[0];
3899 h->ref_count[1]= h->pps.ref_count[1];
3901 if(h->slice_type_nos != FF_I_TYPE){
3902 if(h->slice_type_nos == FF_B_TYPE){
3903 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3905 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3907 if(num_ref_idx_active_override_flag){
3908 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3909 if(h->slice_type_nos==FF_B_TYPE)
3910 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3912 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3913 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3914 h->ref_count[0]= h->ref_count[1]= 1;
3918 if(h->slice_type_nos == FF_B_TYPE)
3925 if(!default_ref_list_done){
3926 fill_default_ref_list(h);
3929 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3932 if(h->slice_type_nos!=FF_I_TYPE){
3933 s->last_picture_ptr= &h->ref_list[0][0];
3934 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3936 if(h->slice_type_nos==FF_B_TYPE){
3937 s->next_picture_ptr= &h->ref_list[1][0];
3938 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3941 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3942 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3943 pred_weight_table(h);
3944 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3945 implicit_weight_table(h);
3948 for (i = 0; i < 2; i++) {
3949 h->luma_weight_flag[i] = 0;
3950 h->chroma_weight_flag[i] = 0;
3955 decode_ref_pic_marking(h0, &s->gb);
3958 fill_mbaff_ref_list(h);
3960 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3961 direct_dist_scale_factor(h);
3962 direct_ref_list_init(h);
3964 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3965 tmp = get_ue_golomb_31(&s->gb);
3967 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3970 h->cabac_init_idc= tmp;
3973 h->last_qscale_diff = 0;
3974 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3976 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3980 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3981 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3982 //FIXME qscale / qp ... stuff
3983 if(h->slice_type == FF_SP_TYPE){
3984 get_bits1(&s->gb); /* sp_for_switch_flag */
3986 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3987 get_se_golomb(&s->gb); /* slice_qs_delta */
3990 h->deblocking_filter = 1;
3991 h->slice_alpha_c0_offset = 0;
3992 h->slice_beta_offset = 0;
3993 if( h->pps.deblocking_filter_parameters_present ) {
3994 tmp= get_ue_golomb_31(&s->gb);
3996 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3999 h->deblocking_filter= tmp;
4000 if(h->deblocking_filter < 2)
4001 h->deblocking_filter^= 1; // 1<->0
4003 if( h->deblocking_filter ) {
4004 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4005 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4009 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4010 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4011 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4012 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4013 h->deblocking_filter= 0;
4015 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4016 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4017 /* Cheat slightly for speed:
4018 Do not bother to deblock across slices. */
4019 h->deblocking_filter = 2;
4021 h0->max_contexts = 1;
4022 if(!h0->single_decode_warning) {
4023 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4024 h0->single_decode_warning = 1;
4027 return 1; // deblocking switched inside frame
4032 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4033 slice_group_change_cycle= get_bits(&s->gb, ?);
4036 h0->last_slice_type = slice_type;
4037 h->slice_num = ++h0->current_slice;
4038 if(h->slice_num >= MAX_SLICES){
4039 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4043 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4047 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4048 +(h->ref_list[j][i].reference&3);
4051 for(i=16; i<48; i++)
4052 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4053 +(h->ref_list[j][i].reference&3);
4056 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4057 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4059 s->avctx->refs= h->sps.ref_frame_count;
4061 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4062 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4064 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4066 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4067 pps_id, h->frame_num,
4068 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4069 h->ref_count[0], h->ref_count[1],
4071 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4073 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4074 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4084 static inline int get_level_prefix(GetBitContext *gb){
4088 OPEN_READER(re, gb);
4089 UPDATE_CACHE(re, gb);
4090 buf=GET_CACHE(re, gb);
4092 log= 32 - av_log2(buf);
4094 print_bin(buf>>(32-log), log);
4095 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4098 LAST_SKIP_BITS(re, gb, log);
4099 CLOSE_READER(re, gb);
4104 static inline int get_dct8x8_allowed(H264Context *h){
4105 if(h->sps.direct_8x8_inference_flag)
4106 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4108 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4112 * decodes a residual block.
4113 * @param n block index
4114 * @param scantable scantable
4115 * @param max_coeff number of coefficients in the block
4116 * @return <0 if an error occurred
4118 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4119 MpegEncContext * const s = &h->s;
4120 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4122 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4124 //FIXME put trailing_onex into the context
4126 if(n == CHROMA_DC_BLOCK_INDEX){
4127 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4128 total_coeff= coeff_token>>2;
4130 if(n == LUMA_DC_BLOCK_INDEX){
4131 total_coeff= pred_non_zero_count(h, 0);
4132 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4133 total_coeff= coeff_token>>2;
4135 total_coeff= pred_non_zero_count(h, n);
4136 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4137 total_coeff= coeff_token>>2;
4138 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4142 //FIXME set last_non_zero?
4146 if(total_coeff > (unsigned)max_coeff) {
4147 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4151 trailing_ones= coeff_token&3;
4152 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4153 assert(total_coeff<=16);
4155 i = show_bits(gb, 3);
4156 skip_bits(gb, trailing_ones);
4157 level[0] = 1-((i&4)>>1);
4158 level[1] = 1-((i&2) );
4159 level[2] = 1-((i&1)<<1);
4161 if(trailing_ones<total_coeff) {
4163 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4164 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4165 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4167 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4168 if(level_code >= 100){
4169 prefix= level_code - 100;
4170 if(prefix == LEVEL_TAB_BITS)
4171 prefix += get_level_prefix(gb);
4173 //first coefficient has suffix_length equal to 0 or 1
4174 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4176 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4178 level_code= (prefix<<suffix_length); //part
4179 }else if(prefix==14){
4181 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4183 level_code= prefix + get_bits(gb, 4); //part
4185 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4186 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4188 level_code += (1<<(prefix-3))-4096;
4191 if(trailing_ones < 3) level_code += 2;
4194 mask= -(level_code&1);
4195 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4197 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4200 if(level_code + 3U > 6U)
4202 level[trailing_ones]= level_code;
4205 //remaining coefficients have suffix_length > 0
4206 for(i=trailing_ones+1;i<total_coeff;i++) {
4207 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4208 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4209 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4211 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4212 if(level_code >= 100){
4213 prefix= level_code - 100;
4214 if(prefix == LEVEL_TAB_BITS){
4215 prefix += get_level_prefix(gb);
4218 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4220 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4222 level_code += (1<<(prefix-3))-4096;
4224 mask= -(level_code&1);
4225 level_code= (((2+level_code)>>1) ^ mask) - mask;
4227 level[i]= level_code;
4229 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4234 if(total_coeff == max_coeff)
4237 if(n == CHROMA_DC_BLOCK_INDEX)
4238 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4240 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4243 coeff_num = zeros_left + total_coeff - 1;
4244 j = scantable[coeff_num];
4246 block[j] = level[0];
4247 for(i=1;i<total_coeff;i++) {
4250 else if(zeros_left < 7){
4251 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4253 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4255 zeros_left -= run_before;
4256 coeff_num -= 1 + run_before;
4257 j= scantable[ coeff_num ];
4262 block[j] = (level[0] * qmul[j] + 32)>>6;
4263 for(i=1;i<total_coeff;i++) {
4266 else if(zeros_left < 7){
4267 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4269 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4271 zeros_left -= run_before;
4272 coeff_num -= 1 + run_before;
4273 j= scantable[ coeff_num ];
4275 block[j]= (level[i] * qmul[j] + 32)>>6;
4280 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4287 static void predict_field_decoding_flag(H264Context *h){
4288 MpegEncContext * const s = &h->s;
4289 const int mb_xy= h->mb_xy;
4290 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4291 ? s->current_picture.mb_type[mb_xy-1]
4292 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4293 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4295 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4299 * decodes a P_SKIP or B_SKIP macroblock
4301 static void decode_mb_skip(H264Context *h){
4302 MpegEncContext * const s = &h->s;
4303 const int mb_xy= h->mb_xy;
4306 memset(h->non_zero_count[mb_xy], 0, 16);
4307 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4310 mb_type|= MB_TYPE_INTERLACED;
4312 if( h->slice_type_nos == FF_B_TYPE )
4314 // just for fill_caches. pred_direct_motion will set the real mb_type
4315 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4317 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4318 pred_direct_motion(h, &mb_type);
4319 mb_type|= MB_TYPE_SKIP;
4324 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4326 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4327 pred_pskip_motion(h, &mx, &my);
4328 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4329 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4332 write_back_motion(h, mb_type);
4333 s->current_picture.mb_type[mb_xy]= mb_type;
4334 s->current_picture.qscale_table[mb_xy]= s->qscale;
4335 h->slice_table[ mb_xy ]= h->slice_num;
4336 h->prev_mb_skipped= 1;
4340 * decodes a macroblock
4341 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4343 static int decode_mb_cavlc(H264Context *h){
4344 MpegEncContext * const s = &h->s;
4346 int partition_count;
4347 unsigned int mb_type, cbp;
4348 int dct8x8_allowed= h->pps.transform_8x8_mode;
4350 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4352 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4353 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4355 if(h->slice_type_nos != FF_I_TYPE){
4356 if(s->mb_skip_run==-1)
4357 s->mb_skip_run= get_ue_golomb(&s->gb);
4359 if (s->mb_skip_run--) {
4360 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4361 if(s->mb_skip_run==0)
4362 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4364 predict_field_decoding_flag(h);
4371 if( (s->mb_y&1) == 0 )
4372 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4375 h->prev_mb_skipped= 0;
4377 mb_type= get_ue_golomb(&s->gb);
4378 if(h->slice_type_nos == FF_B_TYPE){
4380 partition_count= b_mb_type_info[mb_type].partition_count;
4381 mb_type= b_mb_type_info[mb_type].type;
4384 goto decode_intra_mb;
4386 }else if(h->slice_type_nos == FF_P_TYPE){
4388 partition_count= p_mb_type_info[mb_type].partition_count;
4389 mb_type= p_mb_type_info[mb_type].type;
4392 goto decode_intra_mb;
4395 assert(h->slice_type_nos == FF_I_TYPE);
4396 if(h->slice_type == FF_SI_TYPE && mb_type)
4400 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4404 cbp= i_mb_type_info[mb_type].cbp;
4405 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4406 mb_type= i_mb_type_info[mb_type].type;
4410 mb_type |= MB_TYPE_INTERLACED;
4412 h->slice_table[ mb_xy ]= h->slice_num;
4414 if(IS_INTRA_PCM(mb_type)){
4417 // We assume these blocks are very rare so we do not optimize it.
4418 align_get_bits(&s->gb);
4420 // The pixels are stored in the same order as levels in h->mb array.
4421 for(x=0; x < (CHROMA ? 384 : 256); x++){
4422 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4425 // In deblocking, the quantizer is 0
4426 s->current_picture.qscale_table[mb_xy]= 0;
4427 // All coeffs are present
4428 memset(h->non_zero_count[mb_xy], 16, 16);
4430 s->current_picture.mb_type[mb_xy]= mb_type;
4435 h->ref_count[0] <<= 1;
4436 h->ref_count[1] <<= 1;
4439 fill_caches(h, mb_type, 0);
4442 if(IS_INTRA(mb_type)){
4444 // init_top_left_availability(h);
4445 if(IS_INTRA4x4(mb_type)){
4448 if(dct8x8_allowed && get_bits1(&s->gb)){
4449 mb_type |= MB_TYPE_8x8DCT;
4453 // fill_intra4x4_pred_table(h);
4454 for(i=0; i<16; i+=di){
4455 int mode= pred_intra_mode(h, i);
4457 if(!get_bits1(&s->gb)){
4458 const int rem_mode= get_bits(&s->gb, 3);
4459 mode = rem_mode + (rem_mode >= mode);
4463 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4465 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4467 write_back_intra_pred_mode(h);
4468 if( check_intra4x4_pred_mode(h) < 0)
4471 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4472 if(h->intra16x16_pred_mode < 0)
4476 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4479 h->chroma_pred_mode= pred_mode;
4481 }else if(partition_count==4){
4482 int i, j, sub_partition_count[4], list, ref[2][4];
4484 if(h->slice_type_nos == FF_B_TYPE){
4486 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4487 if(h->sub_mb_type[i] >=13){
4488 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4491 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4492 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4494 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4495 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4496 pred_direct_motion(h, &mb_type);
4497 h->ref_cache[0][scan8[4]] =
4498 h->ref_cache[1][scan8[4]] =
4499 h->ref_cache[0][scan8[12]] =
4500 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4503 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4505 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4506 if(h->sub_mb_type[i] >=4){
4507 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4510 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4511 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4515 for(list=0; list<h->list_count; list++){
4516 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4518 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4519 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4523 }else if(ref_count == 2){
4524 tmp= get_bits1(&s->gb)^1;
4526 tmp= get_ue_golomb_31(&s->gb);
4528 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4541 dct8x8_allowed = get_dct8x8_allowed(h);
4543 for(list=0; list<h->list_count; list++){
4545 if(IS_DIRECT(h->sub_mb_type[i])) {
4546 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4549 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4550 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4552 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4553 const int sub_mb_type= h->sub_mb_type[i];
4554 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4555 for(j=0; j<sub_partition_count[i]; j++){
4557 const int index= 4*i + block_width*j;
4558 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4559 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4560 mx += get_se_golomb(&s->gb);
4561 my += get_se_golomb(&s->gb);
4562 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4564 if(IS_SUB_8X8(sub_mb_type)){
4566 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4568 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4569 }else if(IS_SUB_8X4(sub_mb_type)){
4570 mv_cache[ 1 ][0]= mx;
4571 mv_cache[ 1 ][1]= my;
4572 }else if(IS_SUB_4X8(sub_mb_type)){
4573 mv_cache[ 8 ][0]= mx;
4574 mv_cache[ 8 ][1]= my;
4576 mv_cache[ 0 ][0]= mx;
4577 mv_cache[ 0 ][1]= my;
4580 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4586 }else if(IS_DIRECT(mb_type)){
4587 pred_direct_motion(h, &mb_type);
4588 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4590 int list, mx, my, i;
4591 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4592 if(IS_16X16(mb_type)){
4593 for(list=0; list<h->list_count; list++){
4595 if(IS_DIR(mb_type, 0, list)){
4596 if(h->ref_count[list]==1){
4598 }else if(h->ref_count[list]==2){
4599 val= get_bits1(&s->gb)^1;
4601 val= get_ue_golomb_31(&s->gb);
4602 if(val >= h->ref_count[list]){
4603 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4608 val= LIST_NOT_USED&0xFF;
4609 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4611 for(list=0; list<h->list_count; list++){
4613 if(IS_DIR(mb_type, 0, list)){
4614 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4615 mx += get_se_golomb(&s->gb);
4616 my += get_se_golomb(&s->gb);
4617 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4619 val= pack16to32(mx,my);
4622 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4625 else if(IS_16X8(mb_type)){
4626 for(list=0; list<h->list_count; list++){
4629 if(IS_DIR(mb_type, i, list)){
4630 if(h->ref_count[list] == 1){
4632 }else if(h->ref_count[list] == 2){
4633 val= get_bits1(&s->gb)^1;
4635 val= get_ue_golomb_31(&s->gb);
4636 if(val >= h->ref_count[list]){
4637 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4642 val= LIST_NOT_USED&0xFF;
4643 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4646 for(list=0; list<h->list_count; list++){
4649 if(IS_DIR(mb_type, i, list)){
4650 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4651 mx += get_se_golomb(&s->gb);
4652 my += get_se_golomb(&s->gb);
4653 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4655 val= pack16to32(mx,my);
4658 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4662 assert(IS_8X16(mb_type));
4663 for(list=0; list<h->list_count; list++){
4666 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4667 if(h->ref_count[list]==1){
4669 }else if(h->ref_count[list]==2){
4670 val= get_bits1(&s->gb)^1;
4672 val= get_ue_golomb_31(&s->gb);
4673 if(val >= h->ref_count[list]){
4674 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4679 val= LIST_NOT_USED&0xFF;
4680 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4683 for(list=0; list<h->list_count; list++){
4686 if(IS_DIR(mb_type, i, list)){
4687 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4688 mx += get_se_golomb(&s->gb);
4689 my += get_se_golomb(&s->gb);
4690 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4692 val= pack16to32(mx,my);
4695 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4701 if(IS_INTER(mb_type))
4702 write_back_motion(h, mb_type);
4704 if(!IS_INTRA16x16(mb_type)){
4705 cbp= get_ue_golomb(&s->gb);
4707 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4712 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4713 else cbp= golomb_to_inter_cbp [cbp];
4715 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4716 else cbp= golomb_to_inter_cbp_gray[cbp];
4721 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4722 if(get_bits1(&s->gb)){
4723 mb_type |= MB_TYPE_8x8DCT;
4724 h->cbp_table[mb_xy]= cbp;
4727 s->current_picture.mb_type[mb_xy]= mb_type;
4729 if(cbp || IS_INTRA16x16(mb_type)){
4730 int i8x8, i4x4, chroma_idx;
4732 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4733 const uint8_t *scan, *scan8x8, *dc_scan;
4735 // fill_non_zero_count_cache(h);
4737 if(IS_INTERLACED(mb_type)){
4738 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4739 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4740 dc_scan= luma_dc_field_scan;
4742 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4743 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4744 dc_scan= luma_dc_zigzag_scan;
4747 dquant= get_se_golomb(&s->gb);
4749 if( dquant > 25 || dquant < -26 ){
4750 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4754 s->qscale += dquant;
4755 if(((unsigned)s->qscale) > 51){
4756 if(s->qscale<0) s->qscale+= 52;
4757 else s->qscale-= 52;
4760 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4761 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4762 if(IS_INTRA16x16(mb_type)){
4763 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4764 return -1; //FIXME continue if partitioned and other return -1 too
4767 assert((cbp&15) == 0 || (cbp&15) == 15);
4770 for(i8x8=0; i8x8<4; i8x8++){
4771 for(i4x4=0; i4x4<4; i4x4++){
4772 const int index= i4x4 + 4*i8x8;
4773 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4779 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4782 for(i8x8=0; i8x8<4; i8x8++){
4783 if(cbp & (1<<i8x8)){
4784 if(IS_8x8DCT(mb_type)){
4785 DCTELEM *buf = &h->mb[64*i8x8];
4787 for(i4x4=0; i4x4<4; i4x4++){
4788 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4789 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4792 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4793 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4795 for(i4x4=0; i4x4<4; i4x4++){
4796 const int index= i4x4 + 4*i8x8;
4798 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4804 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4805 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4811 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4812 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4818 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4819 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4820 for(i4x4=0; i4x4<4; i4x4++){
4821 const int index= 16 + 4*chroma_idx + i4x4;
4822 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4828 uint8_t * const nnz= &h->non_zero_count_cache[0];
4829 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4830 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4833 uint8_t * const nnz= &h->non_zero_count_cache[0];
4834 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4835 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4836 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4838 s->current_picture.qscale_table[mb_xy]= s->qscale;
4839 write_back_non_zero_count(h);
4842 h->ref_count[0] >>= 1;
4843 h->ref_count[1] >>= 1;
4849 static int decode_cabac_field_decoding_flag(H264Context *h) {
4850 MpegEncContext * const s = &h->s;
4851 const int mb_x = s->mb_x;
4852 const int mb_y = s->mb_y & ~1;
4853 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4854 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4856 unsigned int ctx = 0;
4858 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4861 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4865 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4868 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4869 uint8_t *state= &h->cabac_state[ctx_base];
4873 MpegEncContext * const s = &h->s;
4874 const int mba_xy = h->left_mb_xy[0];
4875 const int mbb_xy = h->top_mb_xy;
4877 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4879 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4881 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4882 return 0; /* I4x4 */
4885 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4886 return 0; /* I4x4 */
4889 if( get_cabac_terminate( &h->cabac ) )
4890 return 25; /* PCM */
4892 mb_type = 1; /* I16x16 */
4893 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4894 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4895 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4896 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4897 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4901 static int decode_cabac_mb_type_b( H264Context *h ) {
4902 MpegEncContext * const s = &h->s;
4904 const int mba_xy = h->left_mb_xy[0];
4905 const int mbb_xy = h->top_mb_xy;
4908 assert(h->slice_type_nos == FF_B_TYPE);
4910 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4912 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4915 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4916 return 0; /* B_Direct_16x16 */
4918 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4919 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4922 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4923 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4924 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4925 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4927 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4928 else if( bits == 13 ) {
4929 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4930 } else if( bits == 14 )
4931 return 11; /* B_L1_L0_8x16 */
4932 else if( bits == 15 )
4933 return 22; /* B_8x8 */
4935 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4936 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4939 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4940 MpegEncContext * const s = &h->s;
4944 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4945 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4948 && h->slice_table[mba_xy] == h->slice_num
4949 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4950 mba_xy += s->mb_stride;
4952 mbb_xy = mb_xy - s->mb_stride;
4954 && h->slice_table[mbb_xy] == h->slice_num
4955 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4956 mbb_xy -= s->mb_stride;
4958 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4960 int mb_xy = h->mb_xy;
4962 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4965 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4967 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4970 if( h->slice_type_nos == FF_B_TYPE )
4972 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4975 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4978 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4981 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4982 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4983 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4985 if( mode >= pred_mode )
4991 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4992 const int mba_xy = h->left_mb_xy[0];
4993 const int mbb_xy = h->top_mb_xy;
4997 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4998 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5001 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5004 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5007 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5009 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5015 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5016 int cbp_b, cbp_a, ctx, cbp = 0;
5018 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5019 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5021 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5022 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5023 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5024 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5025 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5026 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5027 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5028 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5031 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5035 cbp_a = (h->left_cbp>>4)&0x03;
5036 cbp_b = (h-> top_cbp>>4)&0x03;
5039 if( cbp_a > 0 ) ctx++;
5040 if( cbp_b > 0 ) ctx += 2;
5041 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5045 if( cbp_a == 2 ) ctx++;
5046 if( cbp_b == 2 ) ctx += 2;
5047 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5049 static int decode_cabac_mb_dqp( H264Context *h) {
5050 int ctx= h->last_qscale_diff != 0;
5053 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5056 if(val > 102) //prevent infinite loop
5061 return (val + 1)>>1 ;
5063 return -((val + 1)>>1);
5065 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5066 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5068 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5070 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5074 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5076 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5077 return 0; /* B_Direct_8x8 */
5078 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5079 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5081 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5082 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5083 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5086 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5087 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5091 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5092 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5095 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5096 int refa = h->ref_cache[list][scan8[n] - 1];
5097 int refb = h->ref_cache[list][scan8[n] - 8];
5101 if( h->slice_type_nos == FF_B_TYPE) {
5102 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5104 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5113 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5116 if(ref >= 32 /*h->ref_list[list]*/){
5123 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5124 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5125 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5126 int ctxbase = (l == 0) ? 40 : 47;
5128 int ctx = (amvd>2) + (amvd>32);
5130 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5135 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5143 while( get_cabac_bypass( &h->cabac ) ) {
5147 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5152 if( get_cabac_bypass( &h->cabac ) )
5156 return get_cabac_bypass_sign( &h->cabac, -mvd );
5159 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5165 nza = h->left_cbp&0x100;
5166 nzb = h-> top_cbp&0x100;
5168 nza = (h->left_cbp>>(6+idx))&0x01;
5169 nzb = (h-> top_cbp>>(6+idx))&0x01;
5172 assert(cat == 1 || cat == 2 || cat == 4);
5173 nza = h->non_zero_count_cache[scan8[idx] - 1];
5174 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5183 return ctx + 4 * cat;
5186 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5187 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5188 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5189 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5190 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5193 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5194 static const int significant_coeff_flag_offset[2][6] = {
5195 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5196 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5198 static const int last_coeff_flag_offset[2][6] = {
5199 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5200 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5202 static const int coeff_abs_level_m1_offset[6] = {
5203 227+0, 227+10, 227+20, 227+30, 227+39, 426
5205 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5206 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5207 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5208 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5209 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5210 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5211 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5212 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5213 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5215 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5216 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5217 * map node ctx => cabac ctx for level=1 */
5218 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5219 /* map node ctx => cabac ctx for level>1 */
5220 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5221 static const uint8_t coeff_abs_level_transition[2][8] = {
5222 /* update node ctx after decoding a level=1 */
5223 { 1, 2, 3, 3, 4, 5, 6, 7 },
5224 /* update node ctx after decoding a level>1 */
5225 { 4, 4, 4, 4, 5, 6, 7, 7 }
5231 int coeff_count = 0;
5234 uint8_t *significant_coeff_ctx_base;
5235 uint8_t *last_coeff_ctx_base;
5236 uint8_t *abs_level_m1_ctx_base;
5239 #define CABAC_ON_STACK
5241 #ifdef CABAC_ON_STACK
5244 cc.range = h->cabac.range;
5245 cc.low = h->cabac.low;
5246 cc.bytestream= h->cabac.bytestream;
5248 #define CC &h->cabac
5252 /* cat: 0-> DC 16x16 n = 0
5253 * 1-> AC 16x16 n = luma4x4idx
5254 * 2-> Luma4x4 n = luma4x4idx
5255 * 3-> DC Chroma n = iCbCr
5256 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5257 * 5-> Luma8x8 n = 4 * luma8x8idx
5260 /* read coded block flag */
5261 if( is_dc || cat != 5 ) {
5262 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5264 h->non_zero_count_cache[scan8[n]] = 0;
5266 #ifdef CABAC_ON_STACK
5267 h->cabac.range = cc.range ;
5268 h->cabac.low = cc.low ;
5269 h->cabac.bytestream= cc.bytestream;
5275 significant_coeff_ctx_base = h->cabac_state
5276 + significant_coeff_flag_offset[MB_FIELD][cat];
5277 last_coeff_ctx_base = h->cabac_state
5278 + last_coeff_flag_offset[MB_FIELD][cat];
5279 abs_level_m1_ctx_base = h->cabac_state
5280 + coeff_abs_level_m1_offset[cat];
5282 if( !is_dc && cat == 5 ) {
5283 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5284 for(last= 0; last < coefs; last++) { \
5285 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5286 if( get_cabac( CC, sig_ctx )) { \
5287 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5288 index[coeff_count++] = last; \
5289 if( get_cabac( CC, last_ctx ) ) { \
5295 if( last == max_coeff -1 ) {\
5296 index[coeff_count++] = last;\
5298 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5299 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5300 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5302 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5304 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5306 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5309 assert(coeff_count > 0);
5313 h->cbp_table[h->mb_xy] |= 0x100;
5315 h->cbp_table[h->mb_xy] |= 0x40 << n;
5318 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5320 assert( cat == 1 || cat == 2 || cat == 4 );
5321 h->non_zero_count_cache[scan8[n]] = coeff_count;
5326 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5328 int j= scantable[index[--coeff_count]];
5330 if( get_cabac( CC, ctx ) == 0 ) {
5331 node_ctx = coeff_abs_level_transition[0][node_ctx];
5333 block[j] = get_cabac_bypass_sign( CC, -1);
5335 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5339 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5340 node_ctx = coeff_abs_level_transition[1][node_ctx];
5342 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5346 if( coeff_abs >= 15 ) {
5348 while( get_cabac_bypass( CC ) ) {
5354 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5360 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5362 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5365 } while( coeff_count );
5366 #ifdef CABAC_ON_STACK
5367 h->cabac.range = cc.range ;
5368 h->cabac.low = cc.low ;
5369 h->cabac.bytestream= cc.bytestream;
5375 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5376 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5379 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5380 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5384 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5386 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5388 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5389 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5393 static inline void compute_mb_neighbors(H264Context *h)
5395 MpegEncContext * const s = &h->s;
5396 const int mb_xy = h->mb_xy;
5397 h->top_mb_xy = mb_xy - s->mb_stride;
5398 h->left_mb_xy[0] = mb_xy - 1;
5400 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5401 const int top_pair_xy = pair_xy - s->mb_stride;
5402 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5403 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5404 const int curr_mb_field_flag = MB_FIELD;
5405 const int bottom = (s->mb_y & 1);
5407 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5408 h->top_mb_xy -= s->mb_stride;
5410 if (!left_mb_field_flag == curr_mb_field_flag) {
5411 h->left_mb_xy[0] = pair_xy - 1;
5413 } else if (FIELD_PICTURE) {
5414 h->top_mb_xy -= s->mb_stride;
5420 * decodes a macroblock
5421 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5423 static int decode_mb_cabac(H264Context *h) {
5424 MpegEncContext * const s = &h->s;
5426 int mb_type, partition_count, cbp = 0;
5427 int dct8x8_allowed= h->pps.transform_8x8_mode;
5429 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5431 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5432 if( h->slice_type_nos != FF_I_TYPE ) {
5434 /* a skipped mb needs the aff flag from the following mb */
5435 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5436 predict_field_decoding_flag(h);
5437 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5438 skip = h->next_mb_skipped;
5440 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5441 /* read skip flags */
5443 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5444 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5445 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5446 if(!h->next_mb_skipped)
5447 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5452 h->cbp_table[mb_xy] = 0;
5453 h->chroma_pred_mode_table[mb_xy] = 0;
5454 h->last_qscale_diff = 0;
5461 if( (s->mb_y&1) == 0 )
5463 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5466 h->prev_mb_skipped = 0;
5468 compute_mb_neighbors(h);
5470 if( h->slice_type_nos == FF_B_TYPE ) {
5471 mb_type = decode_cabac_mb_type_b( h );
5473 partition_count= b_mb_type_info[mb_type].partition_count;
5474 mb_type= b_mb_type_info[mb_type].type;
5477 goto decode_intra_mb;
5479 } else if( h->slice_type_nos == FF_P_TYPE ) {
5480 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5482 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5483 /* P_L0_D16x16, P_8x8 */
5484 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5486 /* P_L0_D8x16, P_L0_D16x8 */
5487 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5489 partition_count= p_mb_type_info[mb_type].partition_count;
5490 mb_type= p_mb_type_info[mb_type].type;
5492 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5493 goto decode_intra_mb;
5496 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5497 if(h->slice_type == FF_SI_TYPE && mb_type)
5499 assert(h->slice_type_nos == FF_I_TYPE);
5501 partition_count = 0;
5502 cbp= i_mb_type_info[mb_type].cbp;
5503 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5504 mb_type= i_mb_type_info[mb_type].type;
5507 mb_type |= MB_TYPE_INTERLACED;
5509 h->slice_table[ mb_xy ]= h->slice_num;
5511 if(IS_INTRA_PCM(mb_type)) {
5514 // We assume these blocks are very rare so we do not optimize it.
5515 // FIXME The two following lines get the bitstream position in the cabac
5516 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5517 ptr= h->cabac.bytestream;
5518 if(h->cabac.low&0x1) ptr--;
5520 if(h->cabac.low&0x1FF) ptr--;
5523 // The pixels are stored in the same order as levels in h->mb array.
5524 memcpy(h->mb, ptr, 256); ptr+=256;
5526 memcpy(h->mb+128, ptr, 128); ptr+=128;
5529 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5531 // All blocks are present
5532 h->cbp_table[mb_xy] = 0x1ef;
5533 h->chroma_pred_mode_table[mb_xy] = 0;
5534 // In deblocking, the quantizer is 0
5535 s->current_picture.qscale_table[mb_xy]= 0;
5536 // All coeffs are present
5537 memset(h->non_zero_count[mb_xy], 16, 16);
5538 s->current_picture.mb_type[mb_xy]= mb_type;
5539 h->last_qscale_diff = 0;
5544 h->ref_count[0] <<= 1;
5545 h->ref_count[1] <<= 1;
5548 fill_caches(h, mb_type, 0);
5550 if( IS_INTRA( mb_type ) ) {
5552 if( IS_INTRA4x4( mb_type ) ) {
5553 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5554 mb_type |= MB_TYPE_8x8DCT;
5555 for( i = 0; i < 16; i+=4 ) {
5556 int pred = pred_intra_mode( h, i );
5557 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5558 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5561 for( i = 0; i < 16; i++ ) {
5562 int pred = pred_intra_mode( h, i );
5563 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5565 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5568 write_back_intra_pred_mode(h);
5569 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5571 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5572 if( h->intra16x16_pred_mode < 0 ) return -1;
5575 h->chroma_pred_mode_table[mb_xy] =
5576 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5578 pred_mode= check_intra_pred_mode( h, pred_mode );
5579 if( pred_mode < 0 ) return -1;
5580 h->chroma_pred_mode= pred_mode;
5582 } else if( partition_count == 4 ) {
5583 int i, j, sub_partition_count[4], list, ref[2][4];
5585 if( h->slice_type_nos == FF_B_TYPE ) {
5586 for( i = 0; i < 4; i++ ) {
5587 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5588 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5589 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5591 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5592 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5593 pred_direct_motion(h, &mb_type);
5594 h->ref_cache[0][scan8[4]] =
5595 h->ref_cache[1][scan8[4]] =
5596 h->ref_cache[0][scan8[12]] =
5597 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5598 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5599 for( i = 0; i < 4; i++ )
5600 if( IS_DIRECT(h->sub_mb_type[i]) )
5601 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5605 for( i = 0; i < 4; i++ ) {
5606 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5607 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5608 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5612 for( list = 0; list < h->list_count; list++ ) {
5613 for( i = 0; i < 4; i++ ) {
5614 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5615 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5616 if( h->ref_count[list] > 1 ){
5617 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5618 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5619 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5627 h->ref_cache[list][ scan8[4*i]+1 ]=
5628 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5633 dct8x8_allowed = get_dct8x8_allowed(h);
5635 for(list=0; list<h->list_count; list++){
5637 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5638 if(IS_DIRECT(h->sub_mb_type[i])){
5639 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5643 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5644 const int sub_mb_type= h->sub_mb_type[i];
5645 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5646 for(j=0; j<sub_partition_count[i]; j++){
5649 const int index= 4*i + block_width*j;
5650 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5651 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5652 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5654 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5655 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5656 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5658 if(IS_SUB_8X8(sub_mb_type)){
5660 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5662 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5665 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5667 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5668 }else if(IS_SUB_8X4(sub_mb_type)){
5669 mv_cache[ 1 ][0]= mx;
5670 mv_cache[ 1 ][1]= my;
5672 mvd_cache[ 1 ][0]= mx - mpx;
5673 mvd_cache[ 1 ][1]= my - mpy;
5674 }else if(IS_SUB_4X8(sub_mb_type)){
5675 mv_cache[ 8 ][0]= mx;
5676 mv_cache[ 8 ][1]= my;
5678 mvd_cache[ 8 ][0]= mx - mpx;
5679 mvd_cache[ 8 ][1]= my - mpy;
5681 mv_cache[ 0 ][0]= mx;
5682 mv_cache[ 0 ][1]= my;
5684 mvd_cache[ 0 ][0]= mx - mpx;
5685 mvd_cache[ 0 ][1]= my - mpy;
5688 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5689 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5690 p[0] = p[1] = p[8] = p[9] = 0;
5691 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5695 } else if( IS_DIRECT(mb_type) ) {
5696 pred_direct_motion(h, &mb_type);
5697 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5698 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5699 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5701 int list, mx, my, i, mpx, mpy;
5702 if(IS_16X16(mb_type)){
5703 for(list=0; list<h->list_count; list++){
5704 if(IS_DIR(mb_type, 0, list)){
5706 if(h->ref_count[list] > 1){
5707 ref= decode_cabac_mb_ref(h, list, 0);
5708 if(ref >= (unsigned)h->ref_count[list]){
5709 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5714 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5716 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5718 for(list=0; list<h->list_count; list++){
5719 if(IS_DIR(mb_type, 0, list)){
5720 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5722 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5723 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5724 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5726 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5727 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5729 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5732 else if(IS_16X8(mb_type)){
5733 for(list=0; list<h->list_count; list++){
5735 if(IS_DIR(mb_type, i, list)){
5737 if(h->ref_count[list] > 1){
5738 ref= decode_cabac_mb_ref( h, list, 8*i );
5739 if(ref >= (unsigned)h->ref_count[list]){
5740 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5745 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5747 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5750 for(list=0; list<h->list_count; list++){
5752 if(IS_DIR(mb_type, i, list)){
5753 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5754 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5755 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5756 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5758 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5759 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5761 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5762 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5767 assert(IS_8X16(mb_type));
5768 for(list=0; list<h->list_count; list++){
5770 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5772 if(h->ref_count[list] > 1){
5773 ref= decode_cabac_mb_ref( h, list, 4*i );
5774 if(ref >= (unsigned)h->ref_count[list]){
5775 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5780 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5782 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5785 for(list=0; list<h->list_count; list++){
5787 if(IS_DIR(mb_type, i, list)){
5788 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5789 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5790 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5792 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5793 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5794 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5796 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5797 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5804 if( IS_INTER( mb_type ) ) {
5805 h->chroma_pred_mode_table[mb_xy] = 0;
5806 write_back_motion( h, mb_type );
5809 if( !IS_INTRA16x16( mb_type ) ) {
5810 cbp = decode_cabac_mb_cbp_luma( h );
5812 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5815 h->cbp_table[mb_xy] = h->cbp = cbp;
5817 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5818 if( decode_cabac_mb_transform_size( h ) )
5819 mb_type |= MB_TYPE_8x8DCT;
5821 s->current_picture.mb_type[mb_xy]= mb_type;
5823 if( cbp || IS_INTRA16x16( mb_type ) ) {
5824 const uint8_t *scan, *scan8x8, *dc_scan;
5825 const uint32_t *qmul;
5828 if(IS_INTERLACED(mb_type)){
5829 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5830 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5831 dc_scan= luma_dc_field_scan;
5833 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5834 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5835 dc_scan= luma_dc_zigzag_scan;
5838 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5839 if( dqp == INT_MIN ){
5840 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5844 if(((unsigned)s->qscale) > 51){
5845 if(s->qscale<0) s->qscale+= 52;
5846 else s->qscale-= 52;
5848 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5849 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5851 if( IS_INTRA16x16( mb_type ) ) {
5853 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5854 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5857 qmul = h->dequant4_coeff[0][s->qscale];
5858 for( i = 0; i < 16; i++ ) {
5859 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5860 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5863 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5867 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5868 if( cbp & (1<<i8x8) ) {
5869 if( IS_8x8DCT(mb_type) ) {
5870 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5871 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5873 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5874 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5875 const int index = 4*i8x8 + i4x4;
5876 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5878 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5879 //STOP_TIMER("decode_residual")
5883 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5884 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5891 for( c = 0; c < 2; c++ ) {
5892 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5893 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5899 for( c = 0; c < 2; c++ ) {
5900 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5901 for( i = 0; i < 4; i++ ) {
5902 const int index = 16 + 4 * c + i;
5903 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5904 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5908 uint8_t * const nnz= &h->non_zero_count_cache[0];
5909 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5910 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5913 uint8_t * const nnz= &h->non_zero_count_cache[0];
5914 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5915 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5916 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5917 h->last_qscale_diff = 0;
5920 s->current_picture.qscale_table[mb_xy]= s->qscale;
5921 write_back_non_zero_count(h);
5924 h->ref_count[0] >>= 1;
5925 h->ref_count[1] >>= 1;
5932 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5933 const int index_a = qp + h->slice_alpha_c0_offset;
5934 const int alpha = (alpha_table+52)[index_a];
5935 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5939 tc[0] = (tc0_table+52)[index_a][bS[0]];
5940 tc[1] = (tc0_table+52)[index_a][bS[1]];
5941 tc[2] = (tc0_table+52)[index_a][bS[2]];
5942 tc[3] = (tc0_table+52)[index_a][bS[3]];
5943 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5945 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5948 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5949 const int index_a = qp + h->slice_alpha_c0_offset;
5950 const int alpha = (alpha_table+52)[index_a];
5951 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5955 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5956 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5957 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5958 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5959 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5961 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5965 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5967 for( i = 0; i < 16; i++, pix += stride) {
5973 int bS_index = (i >> 1);
5976 bS_index |= (i & 1);
5979 if( bS[bS_index] == 0 ) {
5983 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5984 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5985 alpha = (alpha_table+52)[index_a];
5986 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5988 if( bS[bS_index] < 4 ) {
5989 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5990 const int p0 = pix[-1];
5991 const int p1 = pix[-2];
5992 const int p2 = pix[-3];
5993 const int q0 = pix[0];
5994 const int q1 = pix[1];
5995 const int q2 = pix[2];
5997 if( FFABS( p0 - q0 ) < alpha &&
5998 FFABS( p1 - p0 ) < beta &&
5999 FFABS( q1 - q0 ) < beta ) {
6003 if( FFABS( p2 - p0 ) < beta ) {
6004 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6007 if( FFABS( q2 - q0 ) < beta ) {
6008 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6012 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6013 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6014 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6015 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6018 const int p0 = pix[-1];
6019 const int p1 = pix[-2];
6020 const int p2 = pix[-3];
6022 const int q0 = pix[0];
6023 const int q1 = pix[1];
6024 const int q2 = pix[2];
6026 if( FFABS( p0 - q0 ) < alpha &&
6027 FFABS( p1 - p0 ) < beta &&
6028 FFABS( q1 - q0 ) < beta ) {
6030 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6031 if( FFABS( p2 - p0 ) < beta)
6033 const int p3 = pix[-4];
6035 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6036 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6037 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6040 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6042 if( FFABS( q2 - q0 ) < beta)
6044 const int q3 = pix[3];
6046 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6047 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6048 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6051 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6055 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6056 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6058 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6063 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6065 for( i = 0; i < 8; i++, pix += stride) {
6073 if( bS[bS_index] == 0 ) {
6077 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6078 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6079 alpha = (alpha_table+52)[index_a];
6080 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6082 if( bS[bS_index] < 4 ) {
6083 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6084 const int p0 = pix[-1];
6085 const int p1 = pix[-2];
6086 const int q0 = pix[0];
6087 const int q1 = pix[1];
6089 if( FFABS( p0 - q0 ) < alpha &&
6090 FFABS( p1 - p0 ) < beta &&
6091 FFABS( q1 - q0 ) < beta ) {
6092 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6094 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6095 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6096 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6099 const int p0 = pix[-1];
6100 const int p1 = pix[-2];
6101 const int q0 = pix[0];
6102 const int q1 = pix[1];
6104 if( FFABS( p0 - q0 ) < alpha &&
6105 FFABS( p1 - p0 ) < beta &&
6106 FFABS( q1 - q0 ) < beta ) {
6108 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6109 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6110 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6116 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6117 const int index_a = qp + h->slice_alpha_c0_offset;
6118 const int alpha = (alpha_table+52)[index_a];
6119 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6123 tc[0] = (tc0_table+52)[index_a][bS[0]];
6124 tc[1] = (tc0_table+52)[index_a][bS[1]];
6125 tc[2] = (tc0_table+52)[index_a][bS[2]];
6126 tc[3] = (tc0_table+52)[index_a][bS[3]];
6127 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6129 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6133 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6134 const int index_a = qp + h->slice_alpha_c0_offset;
6135 const int alpha = (alpha_table+52)[index_a];
6136 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6140 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6141 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6142 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6143 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6144 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6146 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6150 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6151 MpegEncContext * const s = &h->s;
6152 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6154 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6158 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6159 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6160 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6161 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6162 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6165 assert(!FRAME_MBAFF);
6167 mb_type = s->current_picture.mb_type[mb_xy];
6168 qp = s->current_picture.qscale_table[mb_xy];
6169 qp0 = s->current_picture.qscale_table[mb_xy-1];
6170 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6171 qpc = get_chroma_qp( h, 0, qp );
6172 qpc0 = get_chroma_qp( h, 0, qp0 );
6173 qpc1 = get_chroma_qp( h, 0, qp1 );
6174 qp0 = (qp + qp0 + 1) >> 1;
6175 qp1 = (qp + qp1 + 1) >> 1;
6176 qpc0 = (qpc + qpc0 + 1) >> 1;
6177 qpc1 = (qpc + qpc1 + 1) >> 1;
6178 qp_thresh = 15 - h->slice_alpha_c0_offset;
6179 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6180 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6183 if( IS_INTRA(mb_type) ) {
6184 int16_t bS4[4] = {4,4,4,4};
6185 int16_t bS3[4] = {3,3,3,3};
6186 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6187 if( IS_8x8DCT(mb_type) ) {
6188 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6189 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6190 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6191 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6193 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6194 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6195 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6196 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6197 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6198 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6199 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6200 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6202 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6203 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6204 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6205 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6206 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6207 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6208 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6209 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6212 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6213 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6215 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6217 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6219 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6220 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6221 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6222 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6224 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6225 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6226 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6227 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6229 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6230 bSv[0][0] = 0x0004000400040004ULL;
6231 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6232 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6234 #define FILTER(hv,dir,edge)\
6235 if(bSv[dir][edge]) {\
6236 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6238 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6239 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6245 } else if( IS_8x8DCT(mb_type) ) {
6265 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6266 MpegEncContext * const s = &h->s;
6268 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6269 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6270 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6271 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6272 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6274 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6275 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6276 // how often to recheck mv-based bS when iterating between edges
6277 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6278 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6279 // how often to recheck mv-based bS when iterating along each edge
6280 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6282 if (first_vertical_edge_done) {
6286 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6289 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6290 && !IS_INTERLACED(mb_type)
6291 && IS_INTERLACED(mbm_type)
6293 // This is a special case in the norm where the filtering must
6294 // be done twice (one each of the field) even if we are in a
6295 // frame macroblock.
6297 static const int nnz_idx[4] = {4,5,6,3};
6298 unsigned int tmp_linesize = 2 * linesize;
6299 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6300 int mbn_xy = mb_xy - 2 * s->mb_stride;
6305 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6306 if( IS_INTRA(mb_type) ||
6307 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6308 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6310 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6311 for( i = 0; i < 4; i++ ) {
6312 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6313 mbn_nnz[nnz_idx[i]] != 0 )
6319 // Do not use s->qscale as luma quantizer because it has not the same
6320 // value in IPCM macroblocks.
6321 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6322 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6323 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6324 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6325 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6326 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6327 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6328 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6335 for( edge = start; edge < edges; edge++ ) {
6336 /* mbn_xy: neighbor macroblock */
6337 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6338 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6339 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6343 if( (edge&1) && IS_8x8DCT(mb_type) )
6346 if( IS_INTRA(mb_type) ||
6347 IS_INTRA(mbn_type) ) {
6350 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6351 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6360 bS[0] = bS[1] = bS[2] = bS[3] = value;
6365 if( edge & mask_edge ) {
6366 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6369 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6370 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6373 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6374 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6375 int bn_idx= b_idx - (dir ? 8:1);
6378 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6379 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6380 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6381 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6384 if(h->slice_type_nos == FF_B_TYPE && v){
6386 for( l = 0; !v && l < 2; l++ ) {
6388 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6389 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6390 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6394 bS[0] = bS[1] = bS[2] = bS[3] = v;
6400 for( i = 0; i < 4; i++ ) {
6401 int x = dir == 0 ? edge : i;
6402 int y = dir == 0 ? i : edge;
6403 int b_idx= 8 + 4 + x + 8*y;
6404 int bn_idx= b_idx - (dir ? 8:1);
6406 if( h->non_zero_count_cache[b_idx] |
6407 h->non_zero_count_cache[bn_idx] ) {
6413 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6414 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6415 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6416 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6422 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6424 for( l = 0; l < 2; l++ ) {
6426 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6427 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6428 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6437 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6442 // Do not use s->qscale as luma quantizer because it has not the same
6443 // value in IPCM macroblocks.
6444 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6445 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6446 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6447 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6449 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6450 if( (edge&1) == 0 ) {
6451 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6452 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6453 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6454 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6457 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6458 if( (edge&1) == 0 ) {
6459 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6460 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6461 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6462 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6468 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6469 MpegEncContext * const s = &h->s;
6470 const int mb_xy= mb_x + mb_y*s->mb_stride;
6471 const int mb_type = s->current_picture.mb_type[mb_xy];
6472 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6473 int first_vertical_edge_done = 0;
6476 //for sufficiently low qp, filtering wouldn't do anything
6477 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6479 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6480 int qp = s->current_picture.qscale_table[mb_xy];
6482 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6483 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6488 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6489 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6490 int top_type, left_type[2];
6491 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6492 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6493 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6495 if(IS_8x8DCT(top_type)){
6496 h->non_zero_count_cache[4+8*0]=
6497 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6498 h->non_zero_count_cache[6+8*0]=
6499 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6501 if(IS_8x8DCT(left_type[0])){
6502 h->non_zero_count_cache[3+8*1]=
6503 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6505 if(IS_8x8DCT(left_type[1])){
6506 h->non_zero_count_cache[3+8*3]=
6507 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6510 if(IS_8x8DCT(mb_type)){
6511 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6512 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6514 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6515 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6517 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6518 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6520 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6521 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6526 // left mb is in picture
6527 && h->slice_table[mb_xy-1] != 0xFFFF
6528 // and current and left pair do not have the same interlaced type
6529 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6530 // and left mb is in the same slice if deblocking_filter == 2
6531 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6532 /* First vertical edge is different in MBAFF frames
6533 * There are 8 different bS to compute and 2 different Qp
6535 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6536 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6541 int mb_qp, mbn0_qp, mbn1_qp;
6543 first_vertical_edge_done = 1;
6545 if( IS_INTRA(mb_type) )
6546 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6548 for( i = 0; i < 8; i++ ) {
6549 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6551 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6553 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6554 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6555 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6557 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6564 mb_qp = s->current_picture.qscale_table[mb_xy];
6565 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6566 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6567 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6568 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6569 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6570 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6571 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6572 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6573 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6574 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6575 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6576 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6579 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6580 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6581 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6582 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6583 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6587 for( dir = 0; dir < 2; dir++ )
6588 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6590 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6591 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6595 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6596 H264Context *h = *(void**)arg;
6597 MpegEncContext * const s = &h->s;
6598 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6602 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6603 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6605 if( h->pps.cabac ) {
6609 align_get_bits( &s->gb );
6612 ff_init_cabac_states( &h->cabac);
6613 ff_init_cabac_decoder( &h->cabac,
6614 s->gb.buffer + get_bits_count(&s->gb)/8,
6615 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6616 /* calculate pre-state */
6617 for( i= 0; i < 460; i++ ) {
6619 if( h->slice_type_nos == FF_I_TYPE )
6620 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6622 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6625 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6627 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6632 int ret = decode_mb_cabac(h);
6634 //STOP_TIMER("decode_mb_cabac")
6636 if(ret>=0) hl_decode_mb(h);
6638 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6641 ret = decode_mb_cabac(h);
6643 if(ret>=0) hl_decode_mb(h);
6646 eos = get_cabac_terminate( &h->cabac );
6648 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6649 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6650 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6654 if( ++s->mb_x >= s->mb_width ) {
6656 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6658 if(FIELD_OR_MBAFF_PICTURE) {
6663 if( eos || s->mb_y >= s->mb_height ) {
6664 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6665 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6672 int ret = decode_mb_cavlc(h);
6674 if(ret>=0) hl_decode_mb(h);
6676 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6678 ret = decode_mb_cavlc(h);
6680 if(ret>=0) hl_decode_mb(h);
6685 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6686 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6691 if(++s->mb_x >= s->mb_width){
6693 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6695 if(FIELD_OR_MBAFF_PICTURE) {
6698 if(s->mb_y >= s->mb_height){
6699 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6701 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6702 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6706 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6713 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6714 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6715 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6716 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6720 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6729 for(;s->mb_y < s->mb_height; s->mb_y++){
6730 for(;s->mb_x < s->mb_width; s->mb_x++){
6731 int ret= decode_mb(h);
6736 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6737 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6742 if(++s->mb_x >= s->mb_width){
6744 if(++s->mb_y >= s->mb_height){
6745 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6757 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6758 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6759 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6763 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6770 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6773 return -1; //not reached
6776 static int decode_picture_timing(H264Context *h){
6777 MpegEncContext * const s = &h->s;
6778 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6779 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6780 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6782 if(h->sps.pic_struct_present_flag){
6783 unsigned int i, num_clock_ts;
6784 h->sei_pic_struct = get_bits(&s->gb, 4);
6786 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6789 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6791 for (i = 0 ; i < num_clock_ts ; i++){
6792 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6793 unsigned int full_timestamp_flag;
6794 skip_bits(&s->gb, 2); /* ct_type */
6795 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6796 skip_bits(&s->gb, 5); /* counting_type */
6797 full_timestamp_flag = get_bits(&s->gb, 1);
6798 skip_bits(&s->gb, 1); /* discontinuity_flag */
6799 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6800 skip_bits(&s->gb, 8); /* n_frames */
6801 if(full_timestamp_flag){
6802 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6803 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6804 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6806 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6807 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6808 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6809 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6810 if(get_bits(&s->gb, 1)) /* hours_flag */
6811 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6815 if(h->sps.time_offset_length > 0)
6816 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6823 static int decode_unregistered_user_data(H264Context *h, int size){
6824 MpegEncContext * const s = &h->s;
6825 uint8_t user_data[16+256];
6831 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6832 user_data[i]= get_bits(&s->gb, 8);
6836 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6837 if(e==1 && build>=0)
6838 h->x264_build= build;
6840 if(s->avctx->debug & FF_DEBUG_BUGS)
6841 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6844 skip_bits(&s->gb, 8);
6849 static int decode_recovery_point(H264Context *h){
6850 MpegEncContext * const s = &h->s;
6852 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6853 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6858 static int decode_buffering_period(H264Context *h){
6859 MpegEncContext * const s = &h->s;
6860 unsigned int sps_id;
6864 sps_id = get_ue_golomb_31(&s->gb);
6865 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6866 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6869 sps = h->sps_buffers[sps_id];
6871 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6872 if (sps->nal_hrd_parameters_present_flag) {
6873 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6874 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6875 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6878 if (sps->vcl_hrd_parameters_present_flag) {
6879 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6880 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6881 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6885 h->sei_buffering_period_present = 1;
6889 int ff_h264_decode_sei(H264Context *h){
6890 MpegEncContext * const s = &h->s;
6892 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6897 type+= show_bits(&s->gb, 8);
6898 }while(get_bits(&s->gb, 8) == 255);
6902 size+= show_bits(&s->gb, 8);
6903 }while(get_bits(&s->gb, 8) == 255);
6906 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6907 if(decode_picture_timing(h) < 0)
6910 case SEI_TYPE_USER_DATA_UNREGISTERED:
6911 if(decode_unregistered_user_data(h, size) < 0)
6914 case SEI_TYPE_RECOVERY_POINT:
6915 if(decode_recovery_point(h) < 0)
6918 case SEI_BUFFERING_PERIOD:
6919 if(decode_buffering_period(h) < 0)
6923 skip_bits(&s->gb, 8*size);
6926 //FIXME check bits here
6927 align_get_bits(&s->gb);
6933 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6934 MpegEncContext * const s = &h->s;
6936 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6938 if(cpb_count > 32U){
6939 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6943 get_bits(&s->gb, 4); /* bit_rate_scale */
6944 get_bits(&s->gb, 4); /* cpb_size_scale */
6945 for(i=0; i<cpb_count; i++){
6946 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6947 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6948 get_bits1(&s->gb); /* cbr_flag */
6950 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6951 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6952 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6953 sps->time_offset_length = get_bits(&s->gb, 5);
6954 sps->cpb_cnt = cpb_count;
6958 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6959 MpegEncContext * const s = &h->s;
6960 int aspect_ratio_info_present_flag;
6961 unsigned int aspect_ratio_idc;
6963 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6965 if( aspect_ratio_info_present_flag ) {
6966 aspect_ratio_idc= get_bits(&s->gb, 8);
6967 if( aspect_ratio_idc == EXTENDED_SAR ) {
6968 sps->sar.num= get_bits(&s->gb, 16);
6969 sps->sar.den= get_bits(&s->gb, 16);
6970 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6971 sps->sar= pixel_aspect[aspect_ratio_idc];
6973 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6980 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6982 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6983 get_bits1(&s->gb); /* overscan_appropriate_flag */
6986 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6987 get_bits(&s->gb, 3); /* video_format */
6988 get_bits1(&s->gb); /* video_full_range_flag */
6989 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6990 get_bits(&s->gb, 8); /* colour_primaries */
6991 get_bits(&s->gb, 8); /* transfer_characteristics */
6992 get_bits(&s->gb, 8); /* matrix_coefficients */
6996 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6997 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6998 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7001 sps->timing_info_present_flag = get_bits1(&s->gb);
7002 if(sps->timing_info_present_flag){
7003 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7004 sps->time_scale = get_bits_long(&s->gb, 32);
7005 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7008 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7009 if(sps->nal_hrd_parameters_present_flag)
7010 if(decode_hrd_parameters(h, sps) < 0)
7012 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7013 if(sps->vcl_hrd_parameters_present_flag)
7014 if(decode_hrd_parameters(h, sps) < 0)
7016 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7017 get_bits1(&s->gb); /* low_delay_hrd_flag */
7018 sps->pic_struct_present_flag = get_bits1(&s->gb);
7020 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7021 if(sps->bitstream_restriction_flag){
7022 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7023 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7024 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7025 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7026 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7027 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7028 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7030 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7031 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7039 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7040 const uint8_t *jvt_list, const uint8_t *fallback_list){
7041 MpegEncContext * const s = &h->s;
7042 int i, last = 8, next = 8;
7043 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7044 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7045 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7047 for(i=0;i<size;i++){
7049 next = (last + get_se_golomb(&s->gb)) & 0xff;
7050 if(!i && !next){ /* matrix not written, we use the preset one */
7051 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7054 last = factors[scan[i]] = next ? next : last;
7058 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7059 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7060 MpegEncContext * const s = &h->s;
7061 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7062 const uint8_t *fallback[4] = {
7063 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7064 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7065 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7066 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7068 if(get_bits1(&s->gb)){
7069 sps->scaling_matrix_present |= is_sps;
7070 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7071 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7072 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7073 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7074 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7075 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7076 if(is_sps || pps->transform_8x8_mode){
7077 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7078 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7083 int ff_h264_decode_seq_parameter_set(H264Context *h){
7084 MpegEncContext * const s = &h->s;
7085 int profile_idc, level_idc;
7086 unsigned int sps_id;
7090 profile_idc= get_bits(&s->gb, 8);
7091 get_bits1(&s->gb); //constraint_set0_flag
7092 get_bits1(&s->gb); //constraint_set1_flag
7093 get_bits1(&s->gb); //constraint_set2_flag
7094 get_bits1(&s->gb); //constraint_set3_flag
7095 get_bits(&s->gb, 4); // reserved
7096 level_idc= get_bits(&s->gb, 8);
7097 sps_id= get_ue_golomb_31(&s->gb);
7099 if(sps_id >= MAX_SPS_COUNT) {
7100 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7103 sps= av_mallocz(sizeof(SPS));
7107 sps->profile_idc= profile_idc;
7108 sps->level_idc= level_idc;
7110 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7111 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7112 sps->scaling_matrix_present = 0;
7114 if(sps->profile_idc >= 100){ //high profile
7115 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7116 if(sps->chroma_format_idc == 3)
7117 sps->residual_color_transform_flag = get_bits1(&s->gb);
7118 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7119 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7120 sps->transform_bypass = get_bits1(&s->gb);
7121 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7123 sps->chroma_format_idc= 1;
7126 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7127 sps->poc_type= get_ue_golomb_31(&s->gb);
7129 if(sps->poc_type == 0){ //FIXME #define
7130 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7131 } else if(sps->poc_type == 1){//FIXME #define
7132 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7133 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7134 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7135 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7137 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7138 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7142 for(i=0; i<sps->poc_cycle_length; i++)
7143 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7144 }else if(sps->poc_type != 2){
7145 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7149 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7150 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7151 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7154 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7155 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7156 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7157 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7158 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7159 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7163 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7164 if(!sps->frame_mbs_only_flag)
7165 sps->mb_aff= get_bits1(&s->gb);
7169 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7171 #ifndef ALLOW_INTERLACE
7173 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7175 sps->crop= get_bits1(&s->gb);
7177 sps->crop_left = get_ue_golomb(&s->gb);
7178 sps->crop_right = get_ue_golomb(&s->gb);
7179 sps->crop_top = get_ue_golomb(&s->gb);
7180 sps->crop_bottom= get_ue_golomb(&s->gb);
7181 if(sps->crop_left || sps->crop_top){
7182 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7184 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7185 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7191 sps->crop_bottom= 0;
7194 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7195 if( sps->vui_parameters_present_flag )
7196 decode_vui_parameters(h, sps);
7198 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7199 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7200 sps_id, sps->profile_idc, sps->level_idc,
7202 sps->ref_frame_count,
7203 sps->mb_width, sps->mb_height,
7204 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7205 sps->direct_8x8_inference_flag ? "8B8" : "",
7206 sps->crop_left, sps->crop_right,
7207 sps->crop_top, sps->crop_bottom,
7208 sps->vui_parameters_present_flag ? "VUI" : "",
7209 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7213 av_free(h->sps_buffers[sps_id]);
7214 h->sps_buffers[sps_id]= sps;
7223 build_qp_table(PPS *pps, int t, int index)
7226 for(i = 0; i < 52; i++)
7227 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7230 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7231 MpegEncContext * const s = &h->s;
7232 unsigned int pps_id= get_ue_golomb(&s->gb);
7235 if(pps_id >= MAX_PPS_COUNT) {
7236 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7240 pps= av_mallocz(sizeof(PPS));
7243 pps->sps_id= get_ue_golomb_31(&s->gb);
7244 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7245 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7249 pps->cabac= get_bits1(&s->gb);
7250 pps->pic_order_present= get_bits1(&s->gb);
7251 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7252 if(pps->slice_group_count > 1 ){
7253 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7254 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7255 switch(pps->mb_slice_group_map_type){
7258 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7259 | run_length[ i ] |1 |ue(v) |
7264 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7266 | top_left_mb[ i ] |1 |ue(v) |
7267 | bottom_right_mb[ i ] |1 |ue(v) |
7275 | slice_group_change_direction_flag |1 |u(1) |
7276 | slice_group_change_rate_minus1 |1 |ue(v) |
7281 | slice_group_id_cnt_minus1 |1 |ue(v) |
7282 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7284 | slice_group_id[ i ] |1 |u(v) |
7289 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7290 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7291 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7292 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7296 pps->weighted_pred= get_bits1(&s->gb);
7297 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7298 pps->init_qp= get_se_golomb(&s->gb) + 26;
7299 pps->init_qs= get_se_golomb(&s->gb) + 26;
7300 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7301 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7302 pps->constrained_intra_pred= get_bits1(&s->gb);
7303 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7305 pps->transform_8x8_mode= 0;
7306 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7307 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7308 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7310 if(get_bits_count(&s->gb) < bit_length){
7311 pps->transform_8x8_mode= get_bits1(&s->gb);
7312 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7313 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7315 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7318 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7319 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7320 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7321 h->pps.chroma_qp_diff= 1;
7323 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7324 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7325 pps_id, pps->sps_id,
7326 pps->cabac ? "CABAC" : "CAVLC",
7327 pps->slice_group_count,
7328 pps->ref_count[0], pps->ref_count[1],
7329 pps->weighted_pred ? "weighted" : "",
7330 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7331 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7332 pps->constrained_intra_pred ? "CONSTR" : "",
7333 pps->redundant_pic_cnt_present ? "REDU" : "",
7334 pps->transform_8x8_mode ? "8x8DCT" : ""
7338 av_free(h->pps_buffers[pps_id]);
7339 h->pps_buffers[pps_id]= pps;
7347 * Call decode_slice() for each context.
7349 * @param h h264 master context
7350 * @param context_count number of contexts to execute
7352 static void execute_decode_slices(H264Context *h, int context_count){
7353 MpegEncContext * const s = &h->s;
7354 AVCodecContext * const avctx= s->avctx;
7358 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7360 if(context_count == 1) {
7361 decode_slice(avctx, &h);
7363 for(i = 1; i < context_count; i++) {
7364 hx = h->thread_context[i];
7365 hx->s.error_recognition = avctx->error_recognition;
7366 hx->s.error_count = 0;
7369 avctx->execute(avctx, (void *)decode_slice,
7370 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7372 /* pull back stuff from slices to master context */
7373 hx = h->thread_context[context_count - 1];
7374 s->mb_x = hx->s.mb_x;
7375 s->mb_y = hx->s.mb_y;
7376 s->dropable = hx->s.dropable;
7377 s->picture_structure = hx->s.picture_structure;
7378 for(i = 1; i < context_count; i++)
7379 h->s.error_count += h->thread_context[i]->s.error_count;
7384 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7385 MpegEncContext * const s = &h->s;
7386 AVCodecContext * const avctx= s->avctx;
7388 H264Context *hx; ///< thread context
7389 int context_count = 0;
7391 h->max_contexts = avctx->thread_count;
7394 for(i=0; i<50; i++){
7395 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7398 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7399 h->current_slice = 0;
7400 if (!s->first_field)
7401 s->current_picture_ptr= NULL;
7413 if(buf_index >= buf_size) break;
7415 for(i = 0; i < h->nal_length_size; i++)
7416 nalsize = (nalsize << 8) | buf[buf_index++];
7417 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7422 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7427 // start code prefix search
7428 for(; buf_index + 3 < buf_size; buf_index++){
7429 // This should always succeed in the first iteration.
7430 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7434 if(buf_index+3 >= buf_size) break;
7439 hx = h->thread_context[context_count];
7441 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7442 if (ptr==NULL || dst_length < 0){
7445 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7447 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7449 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7450 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7453 if (h->is_avc && (nalsize != consumed)){
7454 int i, debug_level = AV_LOG_DEBUG;
7455 for (i = consumed; i < nalsize; i++)
7456 if (buf[buf_index+i])
7457 debug_level = AV_LOG_ERROR;
7458 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7462 buf_index += consumed;
7464 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7465 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7470 switch(hx->nal_unit_type){
7472 if (h->nal_unit_type != NAL_IDR_SLICE) {
7473 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7476 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7478 init_get_bits(&hx->s.gb, ptr, bit_length);
7480 hx->inter_gb_ptr= &hx->s.gb;
7481 hx->s.data_partitioning = 0;
7483 if((err = decode_slice_header(hx, h)))
7486 s->current_picture_ptr->key_frame |=
7487 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7488 (h->sei_recovery_frame_cnt >= 0);
7489 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7490 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7491 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7492 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7493 && avctx->skip_frame < AVDISCARD_ALL){
7494 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7495 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7496 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7497 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7503 init_get_bits(&hx->s.gb, ptr, bit_length);
7505 hx->inter_gb_ptr= NULL;
7506 hx->s.data_partitioning = 1;
7508 err = decode_slice_header(hx, h);
7511 init_get_bits(&hx->intra_gb, ptr, bit_length);
7512 hx->intra_gb_ptr= &hx->intra_gb;
7515 init_get_bits(&hx->inter_gb, ptr, bit_length);
7516 hx->inter_gb_ptr= &hx->inter_gb;
7518 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7519 && s->context_initialized
7521 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7522 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7523 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7524 && avctx->skip_frame < AVDISCARD_ALL)
7528 init_get_bits(&s->gb, ptr, bit_length);
7529 ff_h264_decode_sei(h);
7532 init_get_bits(&s->gb, ptr, bit_length);
7533 ff_h264_decode_seq_parameter_set(h);
7535 if(s->flags& CODEC_FLAG_LOW_DELAY)
7538 if(avctx->has_b_frames < 2)
7539 avctx->has_b_frames= !s->low_delay;
7542 init_get_bits(&s->gb, ptr, bit_length);
7544 ff_h264_decode_picture_parameter_set(h, bit_length);
7548 case NAL_END_SEQUENCE:
7549 case NAL_END_STREAM:
7550 case NAL_FILLER_DATA:
7552 case NAL_AUXILIARY_SLICE:
7555 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7558 if(context_count == h->max_contexts) {
7559 execute_decode_slices(h, context_count);
7564 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7566 /* Slice could not be decoded in parallel mode, copy down
7567 * NAL unit stuff to context 0 and restart. Note that
7568 * rbsp_buffer is not transferred, but since we no longer
7569 * run in parallel mode this should not be an issue. */
7570 h->nal_unit_type = hx->nal_unit_type;
7571 h->nal_ref_idc = hx->nal_ref_idc;
7577 execute_decode_slices(h, context_count);
7582 * returns the number of bytes consumed for building the current frame
7584 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7585 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7586 if(pos+10>buf_size) pos=buf_size; // oops ;)
7591 static int decode_frame(AVCodecContext *avctx,
7592 void *data, int *data_size,
7593 const uint8_t *buf, int buf_size)
7595 H264Context *h = avctx->priv_data;
7596 MpegEncContext *s = &h->s;
7597 AVFrame *pict = data;
7600 s->flags= avctx->flags;
7601 s->flags2= avctx->flags2;
7603 /* end of stream, output what is still in the buffers */
7604 if (buf_size == 0) {
7608 //FIXME factorize this with the output code below
7609 out = h->delayed_pic[0];
7611 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7612 if(h->delayed_pic[i]->poc < out->poc){
7613 out = h->delayed_pic[i];
7617 for(i=out_idx; h->delayed_pic[i]; i++)
7618 h->delayed_pic[i] = h->delayed_pic[i+1];
7621 *data_size = sizeof(AVFrame);
7622 *pict= *(AVFrame*)out;
7628 if(h->is_avc && !h->got_avcC) {
7629 int i, cnt, nalsize;
7630 unsigned char *p = avctx->extradata;
7631 if(avctx->extradata_size < 7) {
7632 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7636 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7639 /* sps and pps in the avcC always have length coded with 2 bytes,
7640 so put a fake nal_length_size = 2 while parsing them */
7641 h->nal_length_size = 2;
7642 // Decode sps from avcC
7643 cnt = *(p+5) & 0x1f; // Number of sps
7645 for (i = 0; i < cnt; i++) {
7646 nalsize = AV_RB16(p) + 2;
7647 if(decode_nal_units(h, p, nalsize) < 0) {
7648 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7653 // Decode pps from avcC
7654 cnt = *(p++); // Number of pps
7655 for (i = 0; i < cnt; i++) {
7656 nalsize = AV_RB16(p) + 2;
7657 if(decode_nal_units(h, p, nalsize) != nalsize) {
7658 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7663 // Now store right nal length size, that will be use to parse all other nals
7664 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7665 // Do not reparse avcC
7669 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7670 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7675 buf_index=decode_nal_units(h, buf, buf_size);
7679 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7680 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7681 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7685 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7686 Picture *out = s->current_picture_ptr;
7687 Picture *cur = s->current_picture_ptr;
7688 int i, pics, cross_idr, out_of_order, out_idx;
7692 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7693 s->current_picture_ptr->pict_type= s->pict_type;
7695 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7696 ff_vdpau_h264_set_reference_frames(s);
7699 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7700 h->prev_poc_msb= h->poc_msb;
7701 h->prev_poc_lsb= h->poc_lsb;
7703 h->prev_frame_num_offset= h->frame_num_offset;
7704 h->prev_frame_num= h->frame_num;
7706 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7707 ff_vdpau_h264_picture_complete(s);
7710 * FIXME: Error handling code does not seem to support interlaced
7711 * when slices span multiple rows
7712 * The ff_er_add_slice calls don't work right for bottom
7713 * fields; they cause massive erroneous error concealing
7714 * Error marking covers both fields (top and bottom).
7715 * This causes a mismatched s->error_count
7716 * and a bad error table. Further, the error count goes to
7717 * INT_MAX when called for bottom field, because mb_y is
7718 * past end by one (callers fault) and resync_mb_y != 0
7719 * causes problems for the first MB line, too.
7725 h->sei_recovery_frame_cnt = -1;
7726 h->sei_dpb_output_delay = 0;
7727 h->sei_cpb_removal_delay = -1;
7728 h->sei_buffering_period_present = 0;
7730 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7731 /* Wait for second field. */
7735 cur->repeat_pict = 0;
7737 /* Signal interlacing information externally. */
7738 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7739 if(h->sps.pic_struct_present_flag){
7740 switch (h->sei_pic_struct)
7742 case SEI_PIC_STRUCT_FRAME:
7743 cur->interlaced_frame = 0;
7745 case SEI_PIC_STRUCT_TOP_FIELD:
7746 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7747 case SEI_PIC_STRUCT_TOP_BOTTOM:
7748 case SEI_PIC_STRUCT_BOTTOM_TOP:
7749 cur->interlaced_frame = 1;
7751 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7752 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7753 // Signal the possibility of telecined film externally (pic_struct 5,6)
7754 // From these hints, let the applications decide if they apply deinterlacing.
7755 cur->repeat_pict = 1;
7756 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7758 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7759 // Force progressive here, as doubling interlaced frame is a bad idea.
7760 cur->interlaced_frame = 0;
7761 cur->repeat_pict = 2;
7763 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7764 cur->interlaced_frame = 0;
7765 cur->repeat_pict = 4;
7769 /* Derive interlacing flag from used decoding process. */
7770 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7773 if (cur->field_poc[0] != cur->field_poc[1]){
7774 /* Derive top_field_first from field pocs. */
7775 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7777 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7778 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7779 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7780 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7781 cur->top_field_first = 1;
7783 cur->top_field_first = 0;
7785 /* Most likely progressive */
7786 cur->top_field_first = 0;
7790 //FIXME do something with unavailable reference frames
7792 /* Sort B-frames into display order */
7794 if(h->sps.bitstream_restriction_flag
7795 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7796 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7800 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7801 && !h->sps.bitstream_restriction_flag){
7802 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7807 while(h->delayed_pic[pics]) pics++;
7809 assert(pics <= MAX_DELAYED_PIC_COUNT);
7811 h->delayed_pic[pics++] = cur;
7812 if(cur->reference == 0)
7813 cur->reference = DELAYED_PIC_REF;
7815 out = h->delayed_pic[0];
7817 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7818 if(h->delayed_pic[i]->poc < out->poc){
7819 out = h->delayed_pic[i];
7822 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7824 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7826 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7828 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7830 ((!cross_idr && out->poc > h->outputed_poc + 2)
7831 || cur->pict_type == FF_B_TYPE)))
7834 s->avctx->has_b_frames++;
7837 if(out_of_order || pics > s->avctx->has_b_frames){
7838 out->reference &= ~DELAYED_PIC_REF;
7839 for(i=out_idx; h->delayed_pic[i]; i++)
7840 h->delayed_pic[i] = h->delayed_pic[i+1];
7842 if(!out_of_order && pics > s->avctx->has_b_frames){
7843 *data_size = sizeof(AVFrame);
7845 h->outputed_poc = out->poc;
7846 *pict= *(AVFrame*)out;
7848 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7853 assert(pict->data[0] || !*data_size);
7854 ff_print_debug_info(s, pict);
7855 //printf("out %d\n", (int)pict->data[0]);
7858 /* Return the Picture timestamp as the frame number */
7859 /* we subtract 1 because it is added on utils.c */
7860 avctx->frame_number = s->picture_number - 1;
7862 return get_consumed_bytes(s, buf_index, buf_size);
7865 static inline void fill_mb_avail(H264Context *h){
7866 MpegEncContext * const s = &h->s;
7867 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7870 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7871 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7872 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7878 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7879 h->mb_avail[4]= 1; //FIXME move out
7880 h->mb_avail[5]= 0; //FIXME move out
7888 #define SIZE (COUNT*40)
7894 // int int_temp[10000];
7896 AVCodecContext avctx;
7898 dsputil_init(&dsp, &avctx);
7900 init_put_bits(&pb, temp, SIZE);
7901 printf("testing unsigned exp golomb\n");
7902 for(i=0; i<COUNT; i++){
7904 set_ue_golomb(&pb, i);
7905 STOP_TIMER("set_ue_golomb");
7907 flush_put_bits(&pb);
7909 init_get_bits(&gb, temp, 8*SIZE);
7910 for(i=0; i<COUNT; i++){
7913 s= show_bits(&gb, 24);
7916 j= get_ue_golomb(&gb);
7918 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7921 STOP_TIMER("get_ue_golomb");
7925 init_put_bits(&pb, temp, SIZE);
7926 printf("testing signed exp golomb\n");
7927 for(i=0; i<COUNT; i++){
7929 set_se_golomb(&pb, i - COUNT/2);
7930 STOP_TIMER("set_se_golomb");
7932 flush_put_bits(&pb);
7934 init_get_bits(&gb, temp, 8*SIZE);
7935 for(i=0; i<COUNT; i++){
7938 s= show_bits(&gb, 24);
7941 j= get_se_golomb(&gb);
7942 if(j != i - COUNT/2){
7943 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7946 STOP_TIMER("get_se_golomb");
7950 printf("testing 4x4 (I)DCT\n");
7953 uint8_t src[16], ref[16];
7954 uint64_t error= 0, max_error=0;
7956 for(i=0; i<COUNT; i++){
7958 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7959 for(j=0; j<16; j++){
7960 ref[j]= random()%255;
7961 src[j]= random()%255;
7964 h264_diff_dct_c(block, src, ref, 4);
7967 for(j=0; j<16; j++){
7968 // printf("%d ", block[j]);
7969 block[j]= block[j]*4;
7970 if(j&1) block[j]= (block[j]*4 + 2)/5;
7971 if(j&4) block[j]= (block[j]*4 + 2)/5;
7975 s->dsp.h264_idct_add(ref, block, 4);
7976 /* for(j=0; j<16; j++){
7977 printf("%d ", ref[j]);
7981 for(j=0; j<16; j++){
7982 int diff= FFABS(src[j] - ref[j]);
7985 max_error= FFMAX(max_error, diff);
7988 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7989 printf("testing quantizer\n");
7990 for(qp=0; qp<52; qp++){
7992 src1_block[i]= src2_block[i]= random()%255;
7995 printf("Testing NAL layer\n");
7997 uint8_t bitstream[COUNT];
7998 uint8_t nal[COUNT*2];
8000 memset(&h, 0, sizeof(H264Context));
8002 for(i=0; i<COUNT; i++){
8010 for(j=0; j<COUNT; j++){
8011 bitstream[j]= (random() % 255) + 1;
8014 for(j=0; j<zeros; j++){
8015 int pos= random() % COUNT;
8016 while(bitstream[pos] == 0){
8025 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8027 printf("encoding failed\n");
8031 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8035 if(out_length != COUNT){
8036 printf("incorrect length %d %d\n", out_length, COUNT);
8040 if(consumed != nal_length){
8041 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8045 if(memcmp(bitstream, out, COUNT)){
8046 printf("mismatch\n");
8052 printf("Testing RBSP\n");
8060 static av_cold int decode_end(AVCodecContext *avctx)
8062 H264Context *h = avctx->priv_data;
8063 MpegEncContext *s = &h->s;
8066 av_freep(&h->rbsp_buffer[0]);
8067 av_freep(&h->rbsp_buffer[1]);
8068 free_tables(h); //FIXME cleanup init stuff perhaps
8070 for(i = 0; i < MAX_SPS_COUNT; i++)
8071 av_freep(h->sps_buffers + i);
8073 for(i = 0; i < MAX_PPS_COUNT; i++)
8074 av_freep(h->pps_buffers + i);
8078 // memset(h, 0, sizeof(H264Context));
8084 AVCodec h264_decoder = {
8088 sizeof(H264Context),
8093 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8095 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8098 #if CONFIG_H264_VDPAU_DECODER
8099 AVCodec h264_vdpau_decoder = {
8103 sizeof(H264Context),
8108 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8110 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8114 #if CONFIG_SVQ3_DECODER