2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
42 #include "x86/h264_i386.h"
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
88 return (a&0xFFFF) + (b<<16);
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
254 if(!(top_type & type_mask))
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
270 if(!(left_type[i] & type_mask))
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
570 if(!(h->top_samples_available&0x8000)){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
613 if(!(h->top_samples_available&0x8000)){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
621 if((h->left_samples_available&0x8080) != 0x8080){
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
780 }else if(top_ref==ref){
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
886 pred_motion(h, 0, 4, 0, 0, mx, my);
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
945 map[list][old_ref] = cur_ref;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
996 assert(h->ref_list[1][0].reference&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1029 }else{ // AFR/FR -> AFR/FR
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1061 if(h->direct_spatial_mv_pred){
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1073 if(refc == PART_NOT_AVAILABLE)
1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1085 for(list=0; list<2; list++){
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1089 mv[list][0] = mv[list][1] = 0;
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
1097 }else if(ref[0] < 0){
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1104 for(i8=0; i8<4; i8++){
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1113 h->sub_mb_type[i8] = sub_mb_type;
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[y8])
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1121 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1131 }else if(IS_16X16(*mb_type)){
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0])
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1139 && (h->x264_build>33 || !h->x264_build)))){
1141 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1157 h->sub_mb_type[i8] = sub_mb_type;
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1167 && (h->x264_build>33 || !h->x264_build)))){
1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1169 if(IS_SUB_8X8(sub_mb_type)){
1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 for(i4=0; i4<4; i4++){
1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1211 const int16_t (*l1mv)[2]= l1mv0;
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1215 h->sub_mb_type[i8] = sub_mb_type;
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type)){
1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col[0])){
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1258 const int scale = dist_scale_factor[ref0];
1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
1275 const int16_t (*l1mv)[2]= l1mv0;
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1279 h->sub_mb_type[i8] = sub_mb_type;
1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col[0])){
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1290 ref0 = map_col_to_list0[0][ref0];
1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1295 scale = dist_scale_factor[ref0];
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1298 if(IS_SUB_8X8(sub_mb_type)){
1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1305 for(i4=0; i4<4; i4++){
1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1318 static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1327 for(list=0; list<h->list_count; list++){
1329 if(!USES_LIST(mb_type, list))
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1336 if( h->pps.cabac ) {
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1356 if(IS_8X8(mb_type)){
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1370 // src[0]&0x80; //forbidden bit
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1383 for(i=0; i+1<length; i+=9){
1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1387 for(i=0; i+1<length; i+=5){
1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1391 if(i>0 && !src[i]) i--;
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1401 /* startcode, so we must be past the end */
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1417 dst= h->rbsp_buffer[bufidx];
1423 //printf("decoding esc\n");
1424 memcpy(dst, src, i);
1427 //remove escapes (very rare 1:2^22)
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
1432 if(src[si+2]==3){ //escape
1437 }else //next start code
1441 dst[di++]= src[si++];
1444 dst[di++]= src[si++];
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1450 *consumed= si + 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 //memset(block, 64, 2*256);
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1551 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1573 static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1599 return h->pps.chroma_qp_table[t][qscale];
1602 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1609 const int luma_xy= (mx&3) + ((my&3)<<2);
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
1617 const int pic_width = 16*s->mb_width;
1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1640 // chroma offset when predicting from a field of opposite parity
1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1649 src_cb= s->edge_emu_buffer;
1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cr= s->edge_emu_buffer;
1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1673 x_offset += 8*s->mb_x;
1674 y_offset += 8*(s->mb_y >> MB_FIELD);
1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1683 chroma_op= chroma_avg;
1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1694 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1706 x_offset += 8*s->mb_x;
1707 y_offset += 8*(s->mb_y >> MB_FIELD);
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1761 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1779 static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1795 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1799 MpegEncContext * const s = &h->s;
1800 const int mb_xy= h->mb_xy;
1801 const int mb_type= s->current_picture.mb_type[mb_xy];
1803 assert(IS_INTER(mb_type));
1805 prefetch_motion(h, 0);
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1810 &weight_op[0], &weight_avg[0],
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1819 &weight_op[1], &weight_avg[1],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1828 &weight_op[2], &weight_avg[2],
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1833 assert(IS_8X8(mb_type));
1836 const int sub_mb_type= h->sub_mb_type[i];
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1844 &weight_op[3], &weight_avg[3],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1853 &weight_op[4], &weight_avg[4],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[5], &weight_avg[5],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1866 assert(IS_SUB_4X4(sub_mb_type));
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1872 &weight_op[6], &weight_avg[6],
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1879 prefetch_motion(h, 1);
1882 static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1907 static av_cold void decode_init_vlc(void){
1908 static int done = 0;
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1918 &chroma_dc_coeff_token_len [0], 1, 1,
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1927 &coeff_token_len [i][0], 1, 1,
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
1948 for(i=0; i<15; i++){
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
1953 &total_zeros_len [i][0], 1, 1,
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1970 &run_len [6][0], 1, 1,
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context *h){
1981 av_freep(&h->intra4x4_pred_mode);
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
1986 av_freep(&h->direct_table);
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
1994 for(i = 0; i < MAX_THREADS; i++) {
1995 hx = h->thread_context[i];
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2003 static void init_dequant8_coeff_table(H264Context *h){
2005 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2006 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2007 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2009 for(i=0; i<2; i++ ){
2010 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2011 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2015 for(q=0; q<52; q++){
2016 int shift = div6[q];
2019 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2020 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2021 h->pps.scaling_matrix8[i][x]) << shift;
2026 static void init_dequant4_coeff_table(H264Context *h){
2028 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2029 for(i=0; i<6; i++ ){
2030 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2032 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2040 for(q=0; q<52; q++){
2041 int shift = div6[q] + 2;
2044 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2045 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2046 h->pps.scaling_matrix4[i][x]) << shift;
2051 static void init_dequant_tables(H264Context *h){
2053 init_dequant4_coeff_table(h);
2054 if(h->pps.transform_8x8_mode)
2055 init_dequant8_coeff_table(h);
2056 if(h->sps.transform_bypass){
2059 h->dequant4_coeff[i][0][x] = 1<<6;
2060 if(h->pps.transform_8x8_mode)
2063 h->dequant8_coeff[i][0][x] = 1<<6;
2070 * needs width/height
2072 static int alloc_tables(H264Context *h){
2073 MpegEncContext * const s = &h->s;
2074 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2077 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2079 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2080 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2081 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2083 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2084 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2088 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2089 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2091 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2092 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2093 for(y=0; y<s->mb_height; y++){
2094 for(x=0; x<s->mb_width; x++){
2095 const int mb_xy= x + y*s->mb_stride;
2096 const int b_xy = 4*x + 4*y*h->b_stride;
2097 const int b8_xy= 2*x + 2*y*h->b8_stride;
2099 h->mb2b_xy [mb_xy]= b_xy;
2100 h->mb2b8_xy[mb_xy]= b8_xy;
2104 s->obmc_scratchpad = NULL;
2106 if(!h->dequant4_coeff[0])
2107 init_dequant_tables(h);
2116 * Mimic alloc_tables(), but for every context thread.
2118 static void clone_tables(H264Context *dst, H264Context *src){
2119 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2120 dst->non_zero_count = src->non_zero_count;
2121 dst->slice_table = src->slice_table;
2122 dst->cbp_table = src->cbp_table;
2123 dst->mb2b_xy = src->mb2b_xy;
2124 dst->mb2b8_xy = src->mb2b8_xy;
2125 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2126 dst->mvd_table[0] = src->mvd_table[0];
2127 dst->mvd_table[1] = src->mvd_table[1];
2128 dst->direct_table = src->direct_table;
2130 dst->s.obmc_scratchpad = NULL;
2131 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2136 * Allocate buffers which are not shared amongst multiple threads.
2138 static int context_init(H264Context *h){
2139 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2140 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2144 return -1; // free_tables will clean up for us
2147 static av_cold void common_init(H264Context *h){
2148 MpegEncContext * const s = &h->s;
2150 s->width = s->avctx->width;
2151 s->height = s->avctx->height;
2152 s->codec_id= s->avctx->codec->id;
2154 ff_h264_pred_init(&h->hpc, s->codec_id);
2156 h->dequant_coeff_pps= -1;
2157 s->unrestricted_mv=1;
2158 s->decode=1; //FIXME
2160 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2162 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2163 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2167 * Reset SEI values at the beginning of the frame.
2169 * @param h H.264 context.
2171 static void reset_sei(H264Context *h) {
2172 h->sei_recovery_frame_cnt = -1;
2173 h->sei_dpb_output_delay = 0;
2174 h->sei_cpb_removal_delay = -1;
2175 h->sei_buffering_period_present = 0;
2178 static av_cold int decode_init(AVCodecContext *avctx){
2179 H264Context *h= avctx->priv_data;
2180 MpegEncContext * const s = &h->s;
2182 MPV_decode_defaults(s);
2187 s->out_format = FMT_H264;
2188 s->workaround_bugs= avctx->workaround_bugs;
2191 // s->decode_mb= ff_h263_decode_mb;
2192 s->quarter_sample = 1;
2193 if(!avctx->has_b_frames)
2196 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2197 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2199 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2200 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2204 if(avctx->extradata_size > 0 && avctx->extradata &&
2205 *(char *)avctx->extradata == 1){
2212 h->thread_context[0] = h;
2213 h->outputed_poc = INT_MIN;
2214 h->prev_poc_msb= 1<<16;
2216 if(avctx->codec_id == CODEC_ID_H264){
2217 if(avctx->ticks_per_frame == 1){
2218 s->avctx->time_base.den *=2;
2220 avctx->ticks_per_frame = 2;
2225 static int frame_start(H264Context *h){
2226 MpegEncContext * const s = &h->s;
2229 if(MPV_frame_start(s, s->avctx) < 0)
2231 ff_er_frame_start(s);
2233 * MPV_frame_start uses pict_type to derive key_frame.
2234 * This is incorrect for H.264; IDR markings must be used.
2235 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2236 * See decode_nal_units().
2238 s->current_picture_ptr->key_frame= 0;
2240 assert(s->linesize && s->uvlinesize);
2242 for(i=0; i<16; i++){
2243 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2244 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2247 h->block_offset[16+i]=
2248 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2249 h->block_offset[24+16+i]=
2250 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2253 /* can't be in alloc_tables because linesize isn't known there.
2254 * FIXME: redo bipred weight to not require extra buffer? */
2255 for(i = 0; i < s->avctx->thread_count; i++)
2256 if(!h->thread_context[i]->s.obmc_scratchpad)
2257 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2259 /* some macroblocks will be accessed before they're available */
2260 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2261 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2263 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2265 // We mark the current picture as non-reference after allocating it, so
2266 // that if we break out due to an error it can be released automatically
2267 // in the next MPV_frame_start().
2268 // SVQ3 as well as most other codecs have only last/next/current and thus
2269 // get released even with set reference, besides SVQ3 and others do not
2270 // mark frames as reference later "naturally".
2271 if(s->codec_id != CODEC_ID_SVQ3)
2272 s->current_picture_ptr->reference= 0;
2274 s->current_picture_ptr->field_poc[0]=
2275 s->current_picture_ptr->field_poc[1]= INT_MAX;
2276 assert(s->current_picture_ptr->long_ref==0);
2281 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2282 MpegEncContext * const s = &h->s;
2291 src_cb -= uvlinesize;
2292 src_cr -= uvlinesize;
2294 if(!simple && FRAME_MBAFF){
2296 offset = MB_MBAFF ? 1 : 17;
2297 uvoffset= MB_MBAFF ? 1 : 9;
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2301 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2308 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2309 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2310 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2311 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2317 top_idx = MB_MBAFF ? 0 : 1;
2319 step= MB_MBAFF ? 2 : 1;
2322 // There are two lines saved, the line above the the top macroblock of a pair,
2323 // and the line above the bottom macroblock
2324 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2325 for(i=1; i<17 - skiplast; i++){
2326 h->left_border[offset+i*step]= src_y[15+i* linesize];
2329 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2330 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2332 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2333 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2334 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2335 for(i=1; i<9 - skiplast; i++){
2336 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2337 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2339 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2340 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2344 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2345 MpegEncContext * const s = &h->s;
2356 if(!simple && FRAME_MBAFF){
2358 offset = MB_MBAFF ? 1 : 17;
2359 uvoffset= MB_MBAFF ? 1 : 9;
2363 top_idx = MB_MBAFF ? 0 : 1;
2365 step= MB_MBAFF ? 2 : 1;
2368 if(h->deblocking_filter == 2) {
2370 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2371 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2373 deblock_left = (s->mb_x > 0);
2374 deblock_top = (s->mb_y > !!MB_FIELD);
2377 src_y -= linesize + 1;
2378 src_cb -= uvlinesize + 1;
2379 src_cr -= uvlinesize + 1;
2381 #define XCHG(a,b,t,xchg)\
2388 for(i = !deblock_top; i<16; i++){
2389 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2391 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2396 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2397 if(s->mb_x+1 < s->mb_width){
2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2402 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2404 for(i = !deblock_top; i<8; i++){
2405 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2406 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2408 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2409 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2412 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2413 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2418 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2419 MpegEncContext * const s = &h->s;
2420 const int mb_x= s->mb_x;
2421 const int mb_y= s->mb_y;
2422 const int mb_xy= h->mb_xy;
2423 const int mb_type= s->current_picture.mb_type[mb_xy];
2424 uint8_t *dest_y, *dest_cb, *dest_cr;
2425 int linesize, uvlinesize /*dct_offset*/;
2427 int *block_offset = &h->block_offset[0];
2428 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2429 /* is_h264 should always be true if SVQ3 is disabled. */
2430 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2431 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2432 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2434 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2435 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2436 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2438 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2439 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2441 if (!simple && MB_FIELD) {
2442 linesize = h->mb_linesize = s->linesize * 2;
2443 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2444 block_offset = &h->block_offset[24];
2445 if(mb_y&1){ //FIXME move out of this function?
2446 dest_y -= s->linesize*15;
2447 dest_cb-= s->uvlinesize*7;
2448 dest_cr-= s->uvlinesize*7;
2452 for(list=0; list<h->list_count; list++){
2453 if(!USES_LIST(mb_type, list))
2455 if(IS_16X16(mb_type)){
2456 int8_t *ref = &h->ref_cache[list][scan8[0]];
2457 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2459 for(i=0; i<16; i+=4){
2460 int ref = h->ref_cache[list][scan8[i]];
2462 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2468 linesize = h->mb_linesize = s->linesize;
2469 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2470 // dct_offset = s->linesize * 16;
2473 if (!simple && IS_INTRA_PCM(mb_type)) {
2474 for (i=0; i<16; i++) {
2475 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2477 for (i=0; i<8; i++) {
2478 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2479 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2482 if(IS_INTRA(mb_type)){
2483 if(h->deblocking_filter)
2484 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2486 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2487 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2488 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2491 if(IS_INTRA4x4(mb_type)){
2492 if(simple || !s->encoding){
2493 if(IS_8x8DCT(mb_type)){
2494 if(transform_bypass){
2496 idct_add = s->dsp.add_pixels8;
2498 idct_dc_add = s->dsp.h264_idct8_dc_add;
2499 idct_add = s->dsp.h264_idct8_add;
2501 for(i=0; i<16; i+=4){
2502 uint8_t * const ptr= dest_y + block_offset[i];
2503 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2504 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2505 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2507 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2508 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2509 (h->topright_samples_available<<i)&0x4000, linesize);
2511 if(nnz == 1 && h->mb[i*16])
2512 idct_dc_add(ptr, h->mb + i*16, linesize);
2514 idct_add (ptr, h->mb + i*16, linesize);
2519 if(transform_bypass){
2521 idct_add = s->dsp.add_pixels4;
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2526 for(i=0; i<16; i++){
2527 uint8_t * const ptr= dest_y + block_offset[i];
2528 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2530 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2531 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2535 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2536 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2537 assert(mb_y || linesize <= block_offset[i]);
2538 if(!topright_avail){
2539 tr= ptr[3 - linesize]*0x01010101;
2540 topright= (uint8_t*) &tr;
2542 topright= ptr + 4 - linesize;
2546 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2547 nnz = h->non_zero_count_cache[ scan8[i] ];
2550 if(nnz == 1 && h->mb[i*16])
2551 idct_dc_add(ptr, h->mb + i*16, linesize);
2553 idct_add (ptr, h->mb + i*16, linesize);
2555 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2562 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2564 if(!transform_bypass)
2565 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2567 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2569 if(h->deblocking_filter)
2570 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2572 hl_motion(h, dest_y, dest_cb, dest_cr,
2573 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2574 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2575 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2579 if(!IS_INTRA4x4(mb_type)){
2581 if(IS_INTRA16x16(mb_type)){
2582 if(transform_bypass){
2583 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2584 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2586 for(i=0; i<16; i++){
2587 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2588 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2592 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2594 }else if(h->cbp&15){
2595 if(transform_bypass){
2596 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2597 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2598 for(i=0; i<16; i+=di){
2599 if(h->non_zero_count_cache[ scan8[i] ]){
2600 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2604 if(IS_8x8DCT(mb_type)){
2605 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2607 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2612 for(i=0; i<16; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2614 uint8_t * const ptr= dest_y + block_offset[i];
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2621 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2622 uint8_t *dest[2] = {dest_cb, dest_cr};
2623 if(transform_bypass){
2624 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2625 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2626 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2628 idct_add = s->dsp.add_pixels4;
2629 for(i=16; i<16+8; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2631 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2635 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2636 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2638 idct_add = s->dsp.h264_idct_add;
2639 idct_dc_add = s->dsp.h264_idct_dc_add;
2640 for(i=16; i<16+8; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2647 for(i=16; i<16+8; i++){
2648 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2649 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2650 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2657 if(h->cbp || IS_INTRA(mb_type))
2658 s->dsp.clear_blocks(h->mb);
2660 if(h->deblocking_filter) {
2661 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2662 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2663 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2664 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2665 if (!simple && FRAME_MBAFF) {
2666 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2668 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2674 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2676 static void hl_decode_mb_simple(H264Context *h){
2677 hl_decode_mb_internal(h, 1);
2681 * Process a macroblock; this handles edge cases, such as interlacing.
2683 static void av_noinline hl_decode_mb_complex(H264Context *h){
2684 hl_decode_mb_internal(h, 0);
2687 static void hl_decode_mb(H264Context *h){
2688 MpegEncContext * const s = &h->s;
2689 const int mb_xy= h->mb_xy;
2690 const int mb_type= s->current_picture.mb_type[mb_xy];
2691 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2694 hl_decode_mb_complex(h);
2695 else hl_decode_mb_simple(h);
2698 static void pic_as_field(Picture *pic, const int parity){
2700 for (i = 0; i < 4; ++i) {
2701 if (parity == PICT_BOTTOM_FIELD)
2702 pic->data[i] += pic->linesize[i];
2703 pic->reference = parity;
2704 pic->linesize[i] *= 2;
2706 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2709 static int split_field_copy(Picture *dest, Picture *src,
2710 int parity, int id_add){
2711 int match = !!(src->reference & parity);
2715 if(parity != PICT_FRAME){
2716 pic_as_field(dest, parity);
2718 dest->pic_id += id_add;
2725 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2729 while(i[0]<len || i[1]<len){
2730 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2732 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2735 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2736 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2739 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2740 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2747 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2752 best_poc= dir ? INT_MIN : INT_MAX;
2754 for(i=0; i<len; i++){
2755 const int poc= src[i]->poc;
2756 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2758 sorted[out_i]= src[i];
2761 if(best_poc == (dir ? INT_MIN : INT_MAX))
2763 limit= sorted[out_i++]->poc - dir;
2769 * fills the default_ref_list.
2771 static int fill_default_ref_list(H264Context *h){
2772 MpegEncContext * const s = &h->s;
2775 if(h->slice_type_nos==FF_B_TYPE){
2776 Picture *sorted[32];
2781 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2783 cur_poc= s->current_picture_ptr->poc;
2785 for(list= 0; list<2; list++){
2786 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2787 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2789 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2790 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2793 if(len < h->ref_count[list])
2794 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2798 if(lens[0] == lens[1] && lens[1] > 1){
2799 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2801 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2804 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2805 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2807 if(len < h->ref_count[0])
2808 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2811 for (i=0; i<h->ref_count[0]; i++) {
2812 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2814 if(h->slice_type_nos==FF_B_TYPE){
2815 for (i=0; i<h->ref_count[1]; i++) {
2816 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2823 static void print_short_term(H264Context *h);
2824 static void print_long_term(H264Context *h);
2827 * Extract structure information about the picture described by pic_num in
2828 * the current decoding context (frame or field). Note that pic_num is
2829 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2830 * @param pic_num picture number for which to extract structure information
2831 * @param structure one of PICT_XXX describing structure of picture
2833 * @return frame number (short term) or long term index of picture
2834 * described by pic_num
2836 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2837 MpegEncContext * const s = &h->s;
2839 *structure = s->picture_structure;
2842 /* opposite field */
2843 *structure ^= PICT_FRAME;
2850 static int decode_ref_pic_list_reordering(H264Context *h){
2851 MpegEncContext * const s = &h->s;
2852 int list, index, pic_structure;
2854 print_short_term(h);
2857 for(list=0; list<h->list_count; list++){
2858 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2860 if(get_bits1(&s->gb)){
2861 int pred= h->curr_pic_num;
2863 for(index=0; ; index++){
2864 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2865 unsigned int pic_id;
2867 Picture *ref = NULL;
2869 if(reordering_of_pic_nums_idc==3)
2872 if(index >= h->ref_count[list]){
2873 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2877 if(reordering_of_pic_nums_idc<3){
2878 if(reordering_of_pic_nums_idc<2){
2879 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2882 if(abs_diff_pic_num > h->max_pic_num){
2883 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2887 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2888 else pred+= abs_diff_pic_num;
2889 pred &= h->max_pic_num - 1;
2891 frame_num = pic_num_extract(h, pred, &pic_structure);
2893 for(i= h->short_ref_count-1; i>=0; i--){
2894 ref = h->short_ref[i];
2895 assert(ref->reference);
2896 assert(!ref->long_ref);
2898 ref->frame_num == frame_num &&
2899 (ref->reference & pic_structure)
2907 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2909 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2912 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2915 ref = h->long_ref[long_idx];
2916 assert(!(ref && !ref->reference));
2917 if(ref && (ref->reference & pic_structure)){
2918 ref->pic_id= pic_id;
2919 assert(ref->long_ref);
2927 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2928 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2930 for(i=index; i+1<h->ref_count[list]; i++){
2931 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2934 for(; i > index; i--){
2935 h->ref_list[list][i]= h->ref_list[list][i-1];
2937 h->ref_list[list][index]= *ref;
2939 pic_as_field(&h->ref_list[list][index], pic_structure);
2943 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2949 for(list=0; list<h->list_count; list++){
2950 for(index= 0; index < h->ref_count[list]; index++){
2951 if(!h->ref_list[list][index].data[0]){
2952 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2953 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2961 static void fill_mbaff_ref_list(H264Context *h){
2963 for(list=0; list<2; list++){ //FIXME try list_count
2964 for(i=0; i<h->ref_count[list]; i++){
2965 Picture *frame = &h->ref_list[list][i];
2966 Picture *field = &h->ref_list[list][16+2*i];
2969 field[0].linesize[j] <<= 1;
2970 field[0].reference = PICT_TOP_FIELD;
2971 field[0].poc= field[0].field_poc[0];
2972 field[1] = field[0];
2974 field[1].data[j] += frame->linesize[j];
2975 field[1].reference = PICT_BOTTOM_FIELD;
2976 field[1].poc= field[1].field_poc[1];
2978 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2979 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2981 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2982 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2986 for(j=0; j<h->ref_count[1]; j++){
2987 for(i=0; i<h->ref_count[0]; i++)
2988 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2989 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2990 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2994 static int pred_weight_table(H264Context *h){
2995 MpegEncContext * const s = &h->s;
2997 int luma_def, chroma_def;
3000 h->use_weight_chroma= 0;
3001 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3002 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3003 luma_def = 1<<h->luma_log2_weight_denom;
3004 chroma_def = 1<<h->chroma_log2_weight_denom;
3006 for(list=0; list<2; list++){
3007 h->luma_weight_flag[list] = 0;
3008 h->chroma_weight_flag[list] = 0;
3009 for(i=0; i<h->ref_count[list]; i++){
3010 int luma_weight_flag, chroma_weight_flag;
3012 luma_weight_flag= get_bits1(&s->gb);
3013 if(luma_weight_flag){
3014 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3015 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3016 if( h->luma_weight[list][i] != luma_def
3017 || h->luma_offset[list][i] != 0) {
3019 h->luma_weight_flag[list]= 1;
3022 h->luma_weight[list][i]= luma_def;
3023 h->luma_offset[list][i]= 0;
3027 chroma_weight_flag= get_bits1(&s->gb);
3028 if(chroma_weight_flag){
3031 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3032 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3033 if( h->chroma_weight[list][i][j] != chroma_def
3034 || h->chroma_offset[list][i][j] != 0) {
3035 h->use_weight_chroma= 1;
3036 h->chroma_weight_flag[list]= 1;
3042 h->chroma_weight[list][i][j]= chroma_def;
3043 h->chroma_offset[list][i][j]= 0;
3048 if(h->slice_type_nos != FF_B_TYPE) break;
3050 h->use_weight= h->use_weight || h->use_weight_chroma;
3054 static void implicit_weight_table(H264Context *h){
3055 MpegEncContext * const s = &h->s;
3057 int cur_poc = s->current_picture_ptr->poc;
3059 for (i = 0; i < 2; i++) {
3060 h->luma_weight_flag[i] = 0;
3061 h->chroma_weight_flag[i] = 0;
3064 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3065 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3067 h->use_weight_chroma= 0;
3072 h->use_weight_chroma= 2;
3073 h->luma_log2_weight_denom= 5;
3074 h->chroma_log2_weight_denom= 5;
3076 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3077 int poc0 = h->ref_list[0][ref0].poc;
3078 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3079 int poc1 = h->ref_list[1][ref1].poc;
3080 int td = av_clip(poc1 - poc0, -128, 127);
3082 int tb = av_clip(cur_poc - poc0, -128, 127);
3083 int tx = (16384 + (FFABS(td) >> 1)) / td;
3084 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3085 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3086 h->implicit_weight[ref0][ref1] = 32;
3088 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3090 h->implicit_weight[ref0][ref1] = 32;
3096 * Mark a picture as no longer needed for reference. The refmask
3097 * argument allows unreferencing of individual fields or the whole frame.
3098 * If the picture becomes entirely unreferenced, but is being held for
3099 * display purposes, it is marked as such.
3100 * @param refmask mask of fields to unreference; the mask is bitwise
3101 * anded with the reference marking of pic
3102 * @return non-zero if pic becomes entirely unreferenced (except possibly
3103 * for display purposes) zero if one of the fields remains in
3106 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3108 if (pic->reference &= refmask) {
3111 for(i = 0; h->delayed_pic[i]; i++)
3112 if(pic == h->delayed_pic[i]){
3113 pic->reference=DELAYED_PIC_REF;
3121 * instantaneous decoder refresh.
3123 static void idr(H264Context *h){
3126 for(i=0; i<16; i++){
3127 remove_long(h, i, 0);
3129 assert(h->long_ref_count==0);
3131 for(i=0; i<h->short_ref_count; i++){
3132 unreference_pic(h, h->short_ref[i], 0);
3133 h->short_ref[i]= NULL;
3135 h->short_ref_count=0;
3136 h->prev_frame_num= 0;
3137 h->prev_frame_num_offset= 0;
3142 /* forget old pics after a seek */
3143 static void flush_dpb(AVCodecContext *avctx){
3144 H264Context *h= avctx->priv_data;
3146 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3147 if(h->delayed_pic[i])
3148 h->delayed_pic[i]->reference= 0;
3149 h->delayed_pic[i]= NULL;
3151 h->outputed_poc= INT_MIN;
3153 if(h->s.current_picture_ptr)
3154 h->s.current_picture_ptr->reference= 0;
3155 h->s.first_field= 0;
3157 ff_mpeg_flush(avctx);
3161 * Find a Picture in the short term reference list by frame number.
3162 * @param frame_num frame number to search for
3163 * @param idx the index into h->short_ref where returned picture is found
3164 * undefined if no picture found.
3165 * @return pointer to the found picture, or NULL if no pic with the provided
3166 * frame number is found
3168 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3169 MpegEncContext * const s = &h->s;
3172 for(i=0; i<h->short_ref_count; i++){
3173 Picture *pic= h->short_ref[i];
3174 if(s->avctx->debug&FF_DEBUG_MMCO)
3175 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3176 if(pic->frame_num == frame_num) {
3185 * Remove a picture from the short term reference list by its index in
3186 * that list. This does no checking on the provided index; it is assumed
3187 * to be valid. Other list entries are shifted down.
3188 * @param i index into h->short_ref of picture to remove.
3190 static void remove_short_at_index(H264Context *h, int i){
3191 assert(i >= 0 && i < h->short_ref_count);
3192 h->short_ref[i]= NULL;
3193 if (--h->short_ref_count)
3194 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3199 * @return the removed picture or NULL if an error occurs
3201 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3202 MpegEncContext * const s = &h->s;
3206 if(s->avctx->debug&FF_DEBUG_MMCO)
3207 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3209 pic = find_short(h, frame_num, &i);
3211 if(unreference_pic(h, pic, ref_mask))
3212 remove_short_at_index(h, i);
3219 * Remove a picture from the long term reference list by its index in
3221 * @return the removed picture or NULL if an error occurs
3223 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3226 pic= h->long_ref[i];
3228 if(unreference_pic(h, pic, ref_mask)){
3229 assert(h->long_ref[i]->long_ref == 1);
3230 h->long_ref[i]->long_ref= 0;
3231 h->long_ref[i]= NULL;
3232 h->long_ref_count--;
3240 * print short term list
3242 static void print_short_term(H264Context *h) {
3244 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3245 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3246 for(i=0; i<h->short_ref_count; i++){
3247 Picture *pic= h->short_ref[i];
3248 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3254 * print long term list
3256 static void print_long_term(H264Context *h) {
3258 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3260 for(i = 0; i < 16; i++){
3261 Picture *pic= h->long_ref[i];
3263 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3270 * Executes the reference picture marking (memory management control operations).
3272 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3273 MpegEncContext * const s = &h->s;
3274 int i, av_uninit(j);
3275 int current_ref_assigned=0;
3276 Picture *av_uninit(pic);
3278 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3279 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3281 for(i=0; i<mmco_count; i++){
3282 int av_uninit(structure), av_uninit(frame_num);
3283 if(s->avctx->debug&FF_DEBUG_MMCO)
3284 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3286 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3287 || mmco[i].opcode == MMCO_SHORT2LONG){
3288 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3289 pic = find_short(h, frame_num, &j);
3291 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3292 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3293 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3298 switch(mmco[i].opcode){
3299 case MMCO_SHORT2UNUSED:
3300 if(s->avctx->debug&FF_DEBUG_MMCO)
3301 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3302 remove_short(h, frame_num, structure ^ PICT_FRAME);
3304 case MMCO_SHORT2LONG:
3305 if (h->long_ref[mmco[i].long_arg] != pic)
3306 remove_long(h, mmco[i].long_arg, 0);
3308 remove_short_at_index(h, j);
3309 h->long_ref[ mmco[i].long_arg ]= pic;
3310 if (h->long_ref[ mmco[i].long_arg ]){
3311 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3312 h->long_ref_count++;
3315 case MMCO_LONG2UNUSED:
3316 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3317 pic = h->long_ref[j];
3319 remove_long(h, j, structure ^ PICT_FRAME);
3320 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3321 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3324 // Comment below left from previous code as it is an interresting note.
3325 /* First field in pair is in short term list or
3326 * at a different long term index.
3327 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3328 * Report the problem and keep the pair where it is,
3329 * and mark this field valid.
3332 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3333 remove_long(h, mmco[i].long_arg, 0);
3335 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3336 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3337 h->long_ref_count++;
3340 s->current_picture_ptr->reference |= s->picture_structure;
3341 current_ref_assigned=1;
3343 case MMCO_SET_MAX_LONG:
3344 assert(mmco[i].long_arg <= 16);
3345 // just remove the long term which index is greater than new max
3346 for(j = mmco[i].long_arg; j<16; j++){
3347 remove_long(h, j, 0);
3351 while(h->short_ref_count){
3352 remove_short(h, h->short_ref[0]->frame_num, 0);
3354 for(j = 0; j < 16; j++) {
3355 remove_long(h, j, 0);
3357 s->current_picture_ptr->poc=
3358 s->current_picture_ptr->field_poc[0]=
3359 s->current_picture_ptr->field_poc[1]=
3363 s->current_picture_ptr->frame_num= 0;
3369 if (!current_ref_assigned) {
3370 /* Second field of complementary field pair; the first field of
3371 * which is already referenced. If short referenced, it
3372 * should be first entry in short_ref. If not, it must exist
3373 * in long_ref; trying to put it on the short list here is an
3374 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3376 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3377 /* Just mark the second field valid */
3378 s->current_picture_ptr->reference = PICT_FRAME;
3379 } else if (s->current_picture_ptr->long_ref) {
3380 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3381 "assignment for second field "
3382 "in complementary field pair "
3383 "(first field is long term)\n");
3385 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3387 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3390 if(h->short_ref_count)
3391 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3393 h->short_ref[0]= s->current_picture_ptr;
3394 h->short_ref_count++;
3395 s->current_picture_ptr->reference |= s->picture_structure;
3399 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3401 /* We have too many reference frames, probably due to corrupted
3402 * stream. Need to discard one frame. Prevents overrun of the
3403 * short_ref and long_ref buffers.
3405 av_log(h->s.avctx, AV_LOG_ERROR,
3406 "number of reference frames exceeds max (probably "
3407 "corrupt input), discarding one\n");
3409 if (h->long_ref_count && !h->short_ref_count) {
3410 for (i = 0; i < 16; ++i)
3415 remove_long(h, i, 0);
3417 pic = h->short_ref[h->short_ref_count - 1];
3418 remove_short(h, pic->frame_num, 0);
3422 print_short_term(h);
3427 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3428 MpegEncContext * const s = &h->s;
3432 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3433 s->broken_link= get_bits1(gb) -1;
3435 h->mmco[0].opcode= MMCO_LONG;
3436 h->mmco[0].long_arg= 0;
3440 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3441 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3442 MMCOOpcode opcode= get_ue_golomb_31(gb);
3444 h->mmco[i].opcode= opcode;
3445 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3446 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3447 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3448 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3452 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3453 unsigned int long_arg= get_ue_golomb_31(gb);
3454 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3455 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3458 h->mmco[i].long_arg= long_arg;
3461 if(opcode > (unsigned)MMCO_LONG){
3462 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3465 if(opcode == MMCO_END)
3470 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3472 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3473 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3474 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3475 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3477 if (FIELD_PICTURE) {
3478 h->mmco[0].short_pic_num *= 2;
3479 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3480 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3490 static int init_poc(H264Context *h){
3491 MpegEncContext * const s = &h->s;
3492 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3494 Picture *cur = s->current_picture_ptr;
3496 h->frame_num_offset= h->prev_frame_num_offset;
3497 if(h->frame_num < h->prev_frame_num)
3498 h->frame_num_offset += max_frame_num;
3500 if(h->sps.poc_type==0){
3501 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3503 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3504 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3505 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3506 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3508 h->poc_msb = h->prev_poc_msb;
3509 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3511 field_poc[1] = h->poc_msb + h->poc_lsb;
3512 if(s->picture_structure == PICT_FRAME)
3513 field_poc[1] += h->delta_poc_bottom;
3514 }else if(h->sps.poc_type==1){
3515 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3518 if(h->sps.poc_cycle_length != 0)
3519 abs_frame_num = h->frame_num_offset + h->frame_num;
3523 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3526 expected_delta_per_poc_cycle = 0;
3527 for(i=0; i < h->sps.poc_cycle_length; i++)
3528 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3530 if(abs_frame_num > 0){
3531 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3532 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3534 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3535 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3536 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3540 if(h->nal_ref_idc == 0)
3541 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3543 field_poc[0] = expectedpoc + h->delta_poc[0];
3544 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3546 if(s->picture_structure == PICT_FRAME)
3547 field_poc[1] += h->delta_poc[1];
3549 int poc= 2*(h->frame_num_offset + h->frame_num);
3558 if(s->picture_structure != PICT_BOTTOM_FIELD)
3559 s->current_picture_ptr->field_poc[0]= field_poc[0];
3560 if(s->picture_structure != PICT_TOP_FIELD)
3561 s->current_picture_ptr->field_poc[1]= field_poc[1];
3562 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3569 * initialize scan tables
3571 static void init_scan_tables(H264Context *h){
3572 MpegEncContext * const s = &h->s;
3574 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3575 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3576 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3578 for(i=0; i<16; i++){
3579 #define T(x) (x>>2) | ((x<<2) & 0xF)
3580 h->zigzag_scan[i] = T(zigzag_scan[i]);
3581 h-> field_scan[i] = T( field_scan[i]);
3585 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3586 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3587 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3588 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3589 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3591 for(i=0; i<64; i++){
3592 #define T(x) (x>>3) | ((x&7)<<3)
3593 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3594 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3595 h->field_scan8x8[i] = T(field_scan8x8[i]);
3596 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3600 if(h->sps.transform_bypass){ //FIXME same ugly
3601 h->zigzag_scan_q0 = zigzag_scan;
3602 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3603 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3604 h->field_scan_q0 = field_scan;
3605 h->field_scan8x8_q0 = field_scan8x8;
3606 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3608 h->zigzag_scan_q0 = h->zigzag_scan;
3609 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3610 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3611 h->field_scan_q0 = h->field_scan;
3612 h->field_scan8x8_q0 = h->field_scan8x8;
3613 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3617 static void field_end(H264Context *h){
3618 MpegEncContext * const s = &h->s;
3619 AVCodecContext * const avctx= s->avctx;
3622 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
3623 s->current_picture_ptr->pict_type= s->pict_type;
3625 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3626 ff_vdpau_h264_set_reference_frames(s);
3629 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
3630 h->prev_poc_msb= h->poc_msb;
3631 h->prev_poc_lsb= h->poc_lsb;
3633 h->prev_frame_num_offset= h->frame_num_offset;
3634 h->prev_frame_num= h->frame_num;
3636 if (avctx->hwaccel) {
3637 if (avctx->hwaccel->end_frame(avctx) < 0)
3638 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
3641 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3642 ff_vdpau_h264_picture_complete(s);
3645 * FIXME: Error handling code does not seem to support interlaced
3646 * when slices span multiple rows
3647 * The ff_er_add_slice calls don't work right for bottom
3648 * fields; they cause massive erroneous error concealing
3649 * Error marking covers both fields (top and bottom).
3650 * This causes a mismatched s->error_count
3651 * and a bad error table. Further, the error count goes to
3652 * INT_MAX when called for bottom field, because mb_y is
3653 * past end by one (callers fault) and resync_mb_y != 0
3654 * causes problems for the first MB line, too.
3663 * Replicates H264 "master" context to thread contexts.
3665 static void clone_slice(H264Context *dst, H264Context *src)
3667 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3668 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3669 dst->s.current_picture = src->s.current_picture;
3670 dst->s.linesize = src->s.linesize;
3671 dst->s.uvlinesize = src->s.uvlinesize;
3672 dst->s.first_field = src->s.first_field;
3674 dst->prev_poc_msb = src->prev_poc_msb;
3675 dst->prev_poc_lsb = src->prev_poc_lsb;
3676 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3677 dst->prev_frame_num = src->prev_frame_num;
3678 dst->short_ref_count = src->short_ref_count;
3680 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3681 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3682 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3683 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3685 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3686 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3690 * decodes a slice header.
3691 * This will also call MPV_common_init() and frame_start() as needed.
3693 * @param h h264context
3694 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3696 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3698 static int decode_slice_header(H264Context *h, H264Context *h0){
3699 MpegEncContext * const s = &h->s;
3700 MpegEncContext * const s0 = &h0->s;
3701 unsigned int first_mb_in_slice;
3702 unsigned int pps_id;
3703 int num_ref_idx_active_override_flag;
3704 unsigned int slice_type, tmp, i, j;
3705 int default_ref_list_done = 0;
3706 int last_pic_structure;
3708 s->dropable= h->nal_ref_idc == 0;
3710 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3711 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3712 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3714 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3715 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3718 first_mb_in_slice= get_ue_golomb(&s->gb);
3720 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3721 h0->current_slice = 0;
3722 if (!s0->first_field)
3723 s->current_picture_ptr= NULL;
3726 slice_type= get_ue_golomb_31(&s->gb);
3728 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3733 h->slice_type_fixed=1;
3735 h->slice_type_fixed=0;
3737 slice_type= golomb_to_pict_type[ slice_type ];
3738 if (slice_type == FF_I_TYPE
3739 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3740 default_ref_list_done = 1;
3742 h->slice_type= slice_type;
3743 h->slice_type_nos= slice_type & 3;
3745 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3746 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3747 av_log(h->s.avctx, AV_LOG_ERROR,
3748 "B picture before any references, skipping\n");
3752 pps_id= get_ue_golomb(&s->gb);
3753 if(pps_id>=MAX_PPS_COUNT){
3754 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3757 if(!h0->pps_buffers[pps_id]) {
3758 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
3761 h->pps= *h0->pps_buffers[pps_id];
3763 if(!h0->sps_buffers[h->pps.sps_id]) {
3764 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
3767 h->sps = *h0->sps_buffers[h->pps.sps_id];
3769 if(h == h0 && h->dequant_coeff_pps != pps_id){
3770 h->dequant_coeff_pps = pps_id;
3771 init_dequant_tables(h);
3774 s->mb_width= h->sps.mb_width;
3775 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3777 h->b_stride= s->mb_width*4;
3778 h->b8_stride= s->mb_width*2;
3780 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3781 if(h->sps.frame_mbs_only_flag)
3782 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3784 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3786 if (s->context_initialized
3787 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3789 return -1; // width / height changed during parallelized decoding
3791 flush_dpb(s->avctx);
3794 if (!s->context_initialized) {
3796 return -1; // we cant (re-)initialize context during parallel decoding
3797 if (MPV_common_init(s) < 0)
3801 init_scan_tables(h);
3804 for(i = 1; i < s->avctx->thread_count; i++) {
3806 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3807 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3808 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3811 init_scan_tables(c);
3815 for(i = 0; i < s->avctx->thread_count; i++)
3816 if(context_init(h->thread_context[i]) < 0)
3819 s->avctx->width = s->width;
3820 s->avctx->height = s->height;
3821 s->avctx->sample_aspect_ratio= h->sps.sar;
3822 if(!s->avctx->sample_aspect_ratio.den)
3823 s->avctx->sample_aspect_ratio.den = 1;
3825 if(h->sps.timing_info_present_flag){
3826 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3827 if(h->x264_build > 0 && h->x264_build < 44)
3828 s->avctx->time_base.den *= 2;
3829 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3830 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3834 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3837 h->mb_aff_frame = 0;
3838 last_pic_structure = s0->picture_structure;
3839 if(h->sps.frame_mbs_only_flag){
3840 s->picture_structure= PICT_FRAME;
3842 if(get_bits1(&s->gb)) { //field_pic_flag
3843 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3845 s->picture_structure= PICT_FRAME;
3846 h->mb_aff_frame = h->sps.mb_aff;
3849 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3851 if(h0->current_slice == 0){
3852 while(h->frame_num != h->prev_frame_num &&
3853 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3854 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3855 if (frame_start(h) < 0)
3857 h->prev_frame_num++;
3858 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3859 s->current_picture_ptr->frame_num= h->prev_frame_num;
3860 execute_ref_pic_marking(h, NULL, 0);
3863 /* See if we have a decoded first field looking for a pair... */
3864 if (s0->first_field) {
3865 assert(s0->current_picture_ptr);
3866 assert(s0->current_picture_ptr->data[0]);
3867 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3869 /* figure out if we have a complementary field pair */
3870 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3872 * Previous field is unmatched. Don't display it, but let it
3873 * remain for reference if marked as such.
3875 s0->current_picture_ptr = NULL;
3876 s0->first_field = FIELD_PICTURE;
3879 if (h->nal_ref_idc &&
3880 s0->current_picture_ptr->reference &&
3881 s0->current_picture_ptr->frame_num != h->frame_num) {
3883 * This and previous field were reference, but had
3884 * different frame_nums. Consider this field first in
3885 * pair. Throw away previous field except for reference
3888 s0->first_field = 1;
3889 s0->current_picture_ptr = NULL;
3892 /* Second field in complementary pair */
3893 s0->first_field = 0;
3898 /* Frame or first field in a potentially complementary pair */
3899 assert(!s0->current_picture_ptr);
3900 s0->first_field = FIELD_PICTURE;
3903 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3904 s0->first_field = 0;
3911 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3913 assert(s->mb_num == s->mb_width * s->mb_height);
3914 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3915 first_mb_in_slice >= s->mb_num){
3916 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3919 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3920 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3921 if (s->picture_structure == PICT_BOTTOM_FIELD)
3922 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3923 assert(s->mb_y < s->mb_height);
3925 if(s->picture_structure==PICT_FRAME){
3926 h->curr_pic_num= h->frame_num;
3927 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3929 h->curr_pic_num= 2*h->frame_num + 1;
3930 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3933 if(h->nal_unit_type == NAL_IDR_SLICE){
3934 get_ue_golomb(&s->gb); /* idr_pic_id */
3937 if(h->sps.poc_type==0){
3938 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3940 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3941 h->delta_poc_bottom= get_se_golomb(&s->gb);
3945 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3946 h->delta_poc[0]= get_se_golomb(&s->gb);
3948 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3949 h->delta_poc[1]= get_se_golomb(&s->gb);
3954 if(h->pps.redundant_pic_cnt_present){
3955 h->redundant_pic_count= get_ue_golomb(&s->gb);
3958 //set defaults, might be overridden a few lines later
3959 h->ref_count[0]= h->pps.ref_count[0];
3960 h->ref_count[1]= h->pps.ref_count[1];
3962 if(h->slice_type_nos != FF_I_TYPE){
3963 if(h->slice_type_nos == FF_B_TYPE){
3964 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3966 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3968 if(num_ref_idx_active_override_flag){
3969 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3970 if(h->slice_type_nos==FF_B_TYPE)
3971 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3973 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3974 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3975 h->ref_count[0]= h->ref_count[1]= 1;
3979 if(h->slice_type_nos == FF_B_TYPE)
3986 if(!default_ref_list_done){
3987 fill_default_ref_list(h);
3990 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3993 if(h->slice_type_nos!=FF_I_TYPE){
3994 s->last_picture_ptr= &h->ref_list[0][0];
3995 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3997 if(h->slice_type_nos==FF_B_TYPE){
3998 s->next_picture_ptr= &h->ref_list[1][0];
3999 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
4002 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4003 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4004 pred_weight_table(h);
4005 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4006 implicit_weight_table(h);
4009 for (i = 0; i < 2; i++) {
4010 h->luma_weight_flag[i] = 0;
4011 h->chroma_weight_flag[i] = 0;
4016 decode_ref_pic_marking(h0, &s->gb);
4019 fill_mbaff_ref_list(h);
4021 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
4022 direct_dist_scale_factor(h);
4023 direct_ref_list_init(h);
4025 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4026 tmp = get_ue_golomb_31(&s->gb);
4028 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4031 h->cabac_init_idc= tmp;
4034 h->last_qscale_diff = 0;
4035 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4037 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4041 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4042 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4043 //FIXME qscale / qp ... stuff
4044 if(h->slice_type == FF_SP_TYPE){
4045 get_bits1(&s->gb); /* sp_for_switch_flag */
4047 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4048 get_se_golomb(&s->gb); /* slice_qs_delta */
4051 h->deblocking_filter = 1;
4052 h->slice_alpha_c0_offset = 0;
4053 h->slice_beta_offset = 0;
4054 if( h->pps.deblocking_filter_parameters_present ) {
4055 tmp= get_ue_golomb_31(&s->gb);
4057 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4060 h->deblocking_filter= tmp;
4061 if(h->deblocking_filter < 2)
4062 h->deblocking_filter^= 1; // 1<->0
4064 if( h->deblocking_filter ) {
4065 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4066 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4070 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4071 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4072 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4073 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4074 h->deblocking_filter= 0;
4076 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4077 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4078 /* Cheat slightly for speed:
4079 Do not bother to deblock across slices. */
4080 h->deblocking_filter = 2;
4082 h0->max_contexts = 1;
4083 if(!h0->single_decode_warning) {
4084 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4085 h0->single_decode_warning = 1;
4088 return 1; // deblocking switched inside frame
4093 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4094 slice_group_change_cycle= get_bits(&s->gb, ?);
4097 h0->last_slice_type = slice_type;
4098 h->slice_num = ++h0->current_slice;
4099 if(h->slice_num >= MAX_SLICES){
4100 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4104 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4108 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4109 +(h->ref_list[j][i].reference&3);
4112 for(i=16; i<48; i++)
4113 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4114 +(h->ref_list[j][i].reference&3);
4117 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4118 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4120 s->avctx->refs= h->sps.ref_frame_count;
4122 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4123 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4125 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4127 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4128 pps_id, h->frame_num,
4129 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4130 h->ref_count[0], h->ref_count[1],
4132 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4134 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4135 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4145 static inline int get_level_prefix(GetBitContext *gb){
4149 OPEN_READER(re, gb);
4150 UPDATE_CACHE(re, gb);
4151 buf=GET_CACHE(re, gb);
4153 log= 32 - av_log2(buf);
4155 print_bin(buf>>(32-log), log);
4156 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4159 LAST_SKIP_BITS(re, gb, log);
4160 CLOSE_READER(re, gb);
4165 static inline int get_dct8x8_allowed(H264Context *h){
4166 if(h->sps.direct_8x8_inference_flag)
4167 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4169 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4173 * decodes a residual block.
4174 * @param n block index
4175 * @param scantable scantable
4176 * @param max_coeff number of coefficients in the block
4177 * @return <0 if an error occurred
4179 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4180 MpegEncContext * const s = &h->s;
4181 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4183 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4185 //FIXME put trailing_onex into the context
4187 if(n == CHROMA_DC_BLOCK_INDEX){
4188 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4189 total_coeff= coeff_token>>2;
4191 if(n == LUMA_DC_BLOCK_INDEX){
4192 total_coeff= pred_non_zero_count(h, 0);
4193 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4194 total_coeff= coeff_token>>2;
4196 total_coeff= pred_non_zero_count(h, n);
4197 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4198 total_coeff= coeff_token>>2;
4199 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4203 //FIXME set last_non_zero?
4207 if(total_coeff > (unsigned)max_coeff) {
4208 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4212 trailing_ones= coeff_token&3;
4213 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4214 assert(total_coeff<=16);
4216 i = show_bits(gb, 3);
4217 skip_bits(gb, trailing_ones);
4218 level[0] = 1-((i&4)>>1);
4219 level[1] = 1-((i&2) );
4220 level[2] = 1-((i&1)<<1);
4222 if(trailing_ones<total_coeff) {
4224 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4225 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4226 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4228 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4229 if(level_code >= 100){
4230 prefix= level_code - 100;
4231 if(prefix == LEVEL_TAB_BITS)
4232 prefix += get_level_prefix(gb);
4234 //first coefficient has suffix_length equal to 0 or 1
4235 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4237 level_code= (prefix<<1) + get_bits1(gb); //part
4239 level_code= prefix; //part
4240 }else if(prefix==14){
4242 level_code= (prefix<<1) + get_bits1(gb); //part
4244 level_code= prefix + get_bits(gb, 4); //part
4246 level_code= 30 + get_bits(gb, prefix-3); //part
4248 level_code += (1<<(prefix-3))-4096;
4251 if(trailing_ones < 3) level_code += 2;
4254 mask= -(level_code&1);
4255 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4257 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4260 if(level_code + 3U > 6U)
4262 level[trailing_ones]= level_code;
4265 //remaining coefficients have suffix_length > 0
4266 for(i=trailing_ones+1;i<total_coeff;i++) {
4267 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4268 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4269 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4271 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4272 if(level_code >= 100){
4273 prefix= level_code - 100;
4274 if(prefix == LEVEL_TAB_BITS){
4275 prefix += get_level_prefix(gb);
4278 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4280 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4282 level_code += (1<<(prefix-3))-4096;
4284 mask= -(level_code&1);
4285 level_code= (((2+level_code)>>1) ^ mask) - mask;
4287 level[i]= level_code;
4289 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4294 if(total_coeff == max_coeff)
4297 if(n == CHROMA_DC_BLOCK_INDEX)
4298 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4300 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4303 coeff_num = zeros_left + total_coeff - 1;
4304 j = scantable[coeff_num];
4306 block[j] = level[0];
4307 for(i=1;i<total_coeff;i++) {
4310 else if(zeros_left < 7){
4311 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4313 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4315 zeros_left -= run_before;
4316 coeff_num -= 1 + run_before;
4317 j= scantable[ coeff_num ];
4322 block[j] = (level[0] * qmul[j] + 32)>>6;
4323 for(i=1;i<total_coeff;i++) {
4326 else if(zeros_left < 7){
4327 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4329 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4331 zeros_left -= run_before;
4332 coeff_num -= 1 + run_before;
4333 j= scantable[ coeff_num ];
4335 block[j]= (level[i] * qmul[j] + 32)>>6;
4340 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4347 static void predict_field_decoding_flag(H264Context *h){
4348 MpegEncContext * const s = &h->s;
4349 const int mb_xy= h->mb_xy;
4350 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4351 ? s->current_picture.mb_type[mb_xy-1]
4352 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4353 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4355 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4359 * decodes a P_SKIP or B_SKIP macroblock
4361 static void decode_mb_skip(H264Context *h){
4362 MpegEncContext * const s = &h->s;
4363 const int mb_xy= h->mb_xy;
4366 memset(h->non_zero_count[mb_xy], 0, 16);
4367 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4370 mb_type|= MB_TYPE_INTERLACED;
4372 if( h->slice_type_nos == FF_B_TYPE )
4374 // just for fill_caches. pred_direct_motion will set the real mb_type
4375 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4377 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4378 pred_direct_motion(h, &mb_type);
4379 mb_type|= MB_TYPE_SKIP;
4384 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4386 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4387 pred_pskip_motion(h, &mx, &my);
4388 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4389 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4392 write_back_motion(h, mb_type);
4393 s->current_picture.mb_type[mb_xy]= mb_type;
4394 s->current_picture.qscale_table[mb_xy]= s->qscale;
4395 h->slice_table[ mb_xy ]= h->slice_num;
4396 h->prev_mb_skipped= 1;
4400 * decodes a macroblock
4401 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4403 static int decode_mb_cavlc(H264Context *h){
4404 MpegEncContext * const s = &h->s;
4406 int partition_count;
4407 unsigned int mb_type, cbp;
4408 int dct8x8_allowed= h->pps.transform_8x8_mode;
4410 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4412 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4413 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4415 if(h->slice_type_nos != FF_I_TYPE){
4416 if(s->mb_skip_run==-1)
4417 s->mb_skip_run= get_ue_golomb(&s->gb);
4419 if (s->mb_skip_run--) {
4420 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4421 if(s->mb_skip_run==0)
4422 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4424 predict_field_decoding_flag(h);
4431 if( (s->mb_y&1) == 0 )
4432 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4435 h->prev_mb_skipped= 0;
4437 mb_type= get_ue_golomb(&s->gb);
4438 if(h->slice_type_nos == FF_B_TYPE){
4440 partition_count= b_mb_type_info[mb_type].partition_count;
4441 mb_type= b_mb_type_info[mb_type].type;
4444 goto decode_intra_mb;
4446 }else if(h->slice_type_nos == FF_P_TYPE){
4448 partition_count= p_mb_type_info[mb_type].partition_count;
4449 mb_type= p_mb_type_info[mb_type].type;
4452 goto decode_intra_mb;
4455 assert(h->slice_type_nos == FF_I_TYPE);
4456 if(h->slice_type == FF_SI_TYPE && mb_type)
4460 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4464 cbp= i_mb_type_info[mb_type].cbp;
4465 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4466 mb_type= i_mb_type_info[mb_type].type;
4470 mb_type |= MB_TYPE_INTERLACED;
4472 h->slice_table[ mb_xy ]= h->slice_num;
4474 if(IS_INTRA_PCM(mb_type)){
4477 // We assume these blocks are very rare so we do not optimize it.
4478 align_get_bits(&s->gb);
4480 // The pixels are stored in the same order as levels in h->mb array.
4481 for(x=0; x < (CHROMA ? 384 : 256); x++){
4482 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4485 // In deblocking, the quantizer is 0
4486 s->current_picture.qscale_table[mb_xy]= 0;
4487 // All coeffs are present
4488 memset(h->non_zero_count[mb_xy], 16, 16);
4490 s->current_picture.mb_type[mb_xy]= mb_type;
4495 h->ref_count[0] <<= 1;
4496 h->ref_count[1] <<= 1;
4499 fill_caches(h, mb_type, 0);
4502 if(IS_INTRA(mb_type)){
4504 // init_top_left_availability(h);
4505 if(IS_INTRA4x4(mb_type)){
4508 if(dct8x8_allowed && get_bits1(&s->gb)){
4509 mb_type |= MB_TYPE_8x8DCT;
4513 // fill_intra4x4_pred_table(h);
4514 for(i=0; i<16; i+=di){
4515 int mode= pred_intra_mode(h, i);
4517 if(!get_bits1(&s->gb)){
4518 const int rem_mode= get_bits(&s->gb, 3);
4519 mode = rem_mode + (rem_mode >= mode);
4523 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4525 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4527 write_back_intra_pred_mode(h);
4528 if( check_intra4x4_pred_mode(h) < 0)
4531 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4532 if(h->intra16x16_pred_mode < 0)
4536 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4539 h->chroma_pred_mode= pred_mode;
4541 }else if(partition_count==4){
4542 int i, j, sub_partition_count[4], list, ref[2][4];
4544 if(h->slice_type_nos == FF_B_TYPE){
4546 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4547 if(h->sub_mb_type[i] >=13){
4548 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4551 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4552 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4554 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4555 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4556 pred_direct_motion(h, &mb_type);
4557 h->ref_cache[0][scan8[4]] =
4558 h->ref_cache[1][scan8[4]] =
4559 h->ref_cache[0][scan8[12]] =
4560 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4563 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4565 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4566 if(h->sub_mb_type[i] >=4){
4567 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4570 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4571 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4575 for(list=0; list<h->list_count; list++){
4576 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4578 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4579 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4583 }else if(ref_count == 2){
4584 tmp= get_bits1(&s->gb)^1;
4586 tmp= get_ue_golomb_31(&s->gb);
4588 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4601 dct8x8_allowed = get_dct8x8_allowed(h);
4603 for(list=0; list<h->list_count; list++){
4605 if(IS_DIRECT(h->sub_mb_type[i])) {
4606 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4609 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4610 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4612 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4613 const int sub_mb_type= h->sub_mb_type[i];
4614 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4615 for(j=0; j<sub_partition_count[i]; j++){
4617 const int index= 4*i + block_width*j;
4618 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4619 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4620 mx += get_se_golomb(&s->gb);
4621 my += get_se_golomb(&s->gb);
4622 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4624 if(IS_SUB_8X8(sub_mb_type)){
4626 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4628 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4629 }else if(IS_SUB_8X4(sub_mb_type)){
4630 mv_cache[ 1 ][0]= mx;
4631 mv_cache[ 1 ][1]= my;
4632 }else if(IS_SUB_4X8(sub_mb_type)){
4633 mv_cache[ 8 ][0]= mx;
4634 mv_cache[ 8 ][1]= my;
4636 mv_cache[ 0 ][0]= mx;
4637 mv_cache[ 0 ][1]= my;
4640 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4646 }else if(IS_DIRECT(mb_type)){
4647 pred_direct_motion(h, &mb_type);
4648 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4650 int list, mx, my, i;
4651 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4652 if(IS_16X16(mb_type)){
4653 for(list=0; list<h->list_count; list++){
4655 if(IS_DIR(mb_type, 0, list)){
4656 if(h->ref_count[list]==1){
4658 }else if(h->ref_count[list]==2){
4659 val= get_bits1(&s->gb)^1;
4661 val= get_ue_golomb_31(&s->gb);
4662 if(val >= h->ref_count[list]){
4663 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4668 val= LIST_NOT_USED&0xFF;
4669 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4671 for(list=0; list<h->list_count; list++){
4673 if(IS_DIR(mb_type, 0, list)){
4674 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4675 mx += get_se_golomb(&s->gb);
4676 my += get_se_golomb(&s->gb);
4677 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4679 val= pack16to32(mx,my);
4682 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4685 else if(IS_16X8(mb_type)){
4686 for(list=0; list<h->list_count; list++){
4689 if(IS_DIR(mb_type, i, list)){
4690 if(h->ref_count[list] == 1){
4692 }else if(h->ref_count[list] == 2){
4693 val= get_bits1(&s->gb)^1;
4695 val= get_ue_golomb_31(&s->gb);
4696 if(val >= h->ref_count[list]){
4697 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4702 val= LIST_NOT_USED&0xFF;
4703 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4706 for(list=0; list<h->list_count; list++){
4709 if(IS_DIR(mb_type, i, list)){
4710 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4711 mx += get_se_golomb(&s->gb);
4712 my += get_se_golomb(&s->gb);
4713 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4715 val= pack16to32(mx,my);
4718 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4722 assert(IS_8X16(mb_type));
4723 for(list=0; list<h->list_count; list++){
4726 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4727 if(h->ref_count[list]==1){
4729 }else if(h->ref_count[list]==2){
4730 val= get_bits1(&s->gb)^1;
4732 val= get_ue_golomb_31(&s->gb);
4733 if(val >= h->ref_count[list]){
4734 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4739 val= LIST_NOT_USED&0xFF;
4740 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4743 for(list=0; list<h->list_count; list++){
4746 if(IS_DIR(mb_type, i, list)){
4747 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4748 mx += get_se_golomb(&s->gb);
4749 my += get_se_golomb(&s->gb);
4750 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4752 val= pack16to32(mx,my);
4755 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4761 if(IS_INTER(mb_type))
4762 write_back_motion(h, mb_type);
4764 if(!IS_INTRA16x16(mb_type)){
4765 cbp= get_ue_golomb(&s->gb);
4767 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4772 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4773 else cbp= golomb_to_inter_cbp [cbp];
4775 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4776 else cbp= golomb_to_inter_cbp_gray[cbp];
4781 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4782 if(get_bits1(&s->gb)){
4783 mb_type |= MB_TYPE_8x8DCT;
4784 h->cbp_table[mb_xy]= cbp;
4787 s->current_picture.mb_type[mb_xy]= mb_type;
4789 if(cbp || IS_INTRA16x16(mb_type)){
4790 int i8x8, i4x4, chroma_idx;
4792 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4793 const uint8_t *scan, *scan8x8, *dc_scan;
4795 // fill_non_zero_count_cache(h);
4797 if(IS_INTERLACED(mb_type)){
4798 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4799 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4800 dc_scan= luma_dc_field_scan;
4802 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4803 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4804 dc_scan= luma_dc_zigzag_scan;
4807 dquant= get_se_golomb(&s->gb);
4809 if( dquant > 25 || dquant < -26 ){
4810 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4814 s->qscale += dquant;
4815 if(((unsigned)s->qscale) > 51){
4816 if(s->qscale<0) s->qscale+= 52;
4817 else s->qscale-= 52;
4820 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4821 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4822 if(IS_INTRA16x16(mb_type)){
4823 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4824 return -1; //FIXME continue if partitioned and other return -1 too
4827 assert((cbp&15) == 0 || (cbp&15) == 15);
4830 for(i8x8=0; i8x8<4; i8x8++){
4831 for(i4x4=0; i4x4<4; i4x4++){
4832 const int index= i4x4 + 4*i8x8;
4833 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4839 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4842 for(i8x8=0; i8x8<4; i8x8++){
4843 if(cbp & (1<<i8x8)){
4844 if(IS_8x8DCT(mb_type)){
4845 DCTELEM *buf = &h->mb[64*i8x8];
4847 for(i4x4=0; i4x4<4; i4x4++){
4848 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4849 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4852 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4853 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4855 for(i4x4=0; i4x4<4; i4x4++){
4856 const int index= i4x4 + 4*i8x8;
4858 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4864 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4865 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4871 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4872 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4878 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4879 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4880 for(i4x4=0; i4x4<4; i4x4++){
4881 const int index= 16 + 4*chroma_idx + i4x4;
4882 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4888 uint8_t * const nnz= &h->non_zero_count_cache[0];
4889 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4890 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4893 uint8_t * const nnz= &h->non_zero_count_cache[0];
4894 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4895 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4896 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4898 s->current_picture.qscale_table[mb_xy]= s->qscale;
4899 write_back_non_zero_count(h);
4902 h->ref_count[0] >>= 1;
4903 h->ref_count[1] >>= 1;
4909 static int decode_cabac_field_decoding_flag(H264Context *h) {
4910 MpegEncContext * const s = &h->s;
4911 const int mb_x = s->mb_x;
4912 const int mb_y = s->mb_y & ~1;
4913 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4914 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4916 unsigned int ctx = 0;
4918 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4921 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4925 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4928 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4929 uint8_t *state= &h->cabac_state[ctx_base];
4933 MpegEncContext * const s = &h->s;
4934 const int mba_xy = h->left_mb_xy[0];
4935 const int mbb_xy = h->top_mb_xy;
4937 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4939 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4941 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4942 return 0; /* I4x4 */
4945 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4946 return 0; /* I4x4 */
4949 if( get_cabac_terminate( &h->cabac ) )
4950 return 25; /* PCM */
4952 mb_type = 1; /* I16x16 */
4953 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4954 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4955 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4956 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4957 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4961 static int decode_cabac_mb_type_b( H264Context *h ) {
4962 MpegEncContext * const s = &h->s;
4964 const int mba_xy = h->left_mb_xy[0];
4965 const int mbb_xy = h->top_mb_xy;
4968 assert(h->slice_type_nos == FF_B_TYPE);
4970 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4972 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4975 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4976 return 0; /* B_Direct_16x16 */
4978 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4979 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4982 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4983 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4984 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4985 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4987 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4988 else if( bits == 13 ) {
4989 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4990 } else if( bits == 14 )
4991 return 11; /* B_L1_L0_8x16 */
4992 else if( bits == 15 )
4993 return 22; /* B_8x8 */
4995 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4996 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4999 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5000 MpegEncContext * const s = &h->s;
5004 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5005 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5008 && h->slice_table[mba_xy] == h->slice_num
5009 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5010 mba_xy += s->mb_stride;
5012 mbb_xy = mb_xy - s->mb_stride;
5014 && h->slice_table[mbb_xy] == h->slice_num
5015 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5016 mbb_xy -= s->mb_stride;
5018 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5020 int mb_xy = h->mb_xy;
5022 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5025 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5027 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5030 if( h->slice_type_nos == FF_B_TYPE )
5032 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5035 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5038 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5041 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5042 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5043 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5045 if( mode >= pred_mode )
5051 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5052 const int mba_xy = h->left_mb_xy[0];
5053 const int mbb_xy = h->top_mb_xy;
5057 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5058 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5061 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5064 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5067 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5069 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5075 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5076 int cbp_b, cbp_a, ctx, cbp = 0;
5078 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5079 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5081 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5082 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5083 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5084 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5085 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5086 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5087 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5088 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5091 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5095 cbp_a = (h->left_cbp>>4)&0x03;
5096 cbp_b = (h-> top_cbp>>4)&0x03;
5099 if( cbp_a > 0 ) ctx++;
5100 if( cbp_b > 0 ) ctx += 2;
5101 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5105 if( cbp_a == 2 ) ctx++;
5106 if( cbp_b == 2 ) ctx += 2;
5107 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5109 static int decode_cabac_mb_dqp( H264Context *h) {
5110 int ctx= h->last_qscale_diff != 0;
5113 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5116 if(val > 102) //prevent infinite loop
5121 return (val + 1)>>1 ;
5123 return -((val + 1)>>1);
5125 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5126 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5128 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5130 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5134 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5136 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5137 return 0; /* B_Direct_8x8 */
5138 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5139 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5141 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5142 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5143 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5146 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5147 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5151 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5152 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5155 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5156 int refa = h->ref_cache[list][scan8[n] - 1];
5157 int refb = h->ref_cache[list][scan8[n] - 8];
5161 if( h->slice_type_nos == FF_B_TYPE) {
5162 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5164 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5173 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5176 if(ref >= 32 /*h->ref_list[list]*/){
5183 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5184 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5185 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5186 int ctxbase = (l == 0) ? 40 : 47;
5188 int ctx = (amvd>2) + (amvd>32);
5190 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5195 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5203 while( get_cabac_bypass( &h->cabac ) ) {
5207 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5212 if( get_cabac_bypass( &h->cabac ) )
5216 return get_cabac_bypass_sign( &h->cabac, -mvd );
5219 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5225 nza = h->left_cbp&0x100;
5226 nzb = h-> top_cbp&0x100;
5228 nza = (h->left_cbp>>(6+idx))&0x01;
5229 nzb = (h-> top_cbp>>(6+idx))&0x01;
5232 assert(cat == 1 || cat == 2 || cat == 4);
5233 nza = h->non_zero_count_cache[scan8[idx] - 1];
5234 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5243 return ctx + 4 * cat;
5246 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5247 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5248 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5249 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5250 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5253 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5254 static const int significant_coeff_flag_offset[2][6] = {
5255 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5256 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5258 static const int last_coeff_flag_offset[2][6] = {
5259 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5260 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5262 static const int coeff_abs_level_m1_offset[6] = {
5263 227+0, 227+10, 227+20, 227+30, 227+39, 426
5265 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5266 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5267 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5268 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5269 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5270 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5271 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5272 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5273 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5275 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5276 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5277 * map node ctx => cabac ctx for level=1 */
5278 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5279 /* map node ctx => cabac ctx for level>1 */
5280 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5281 static const uint8_t coeff_abs_level_transition[2][8] = {
5282 /* update node ctx after decoding a level=1 */
5283 { 1, 2, 3, 3, 4, 5, 6, 7 },
5284 /* update node ctx after decoding a level>1 */
5285 { 4, 4, 4, 4, 5, 6, 7, 7 }
5291 int coeff_count = 0;
5294 uint8_t *significant_coeff_ctx_base;
5295 uint8_t *last_coeff_ctx_base;
5296 uint8_t *abs_level_m1_ctx_base;
5299 #define CABAC_ON_STACK
5301 #ifdef CABAC_ON_STACK
5304 cc.range = h->cabac.range;
5305 cc.low = h->cabac.low;
5306 cc.bytestream= h->cabac.bytestream;
5308 #define CC &h->cabac
5312 /* cat: 0-> DC 16x16 n = 0
5313 * 1-> AC 16x16 n = luma4x4idx
5314 * 2-> Luma4x4 n = luma4x4idx
5315 * 3-> DC Chroma n = iCbCr
5316 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5317 * 5-> Luma8x8 n = 4 * luma8x8idx
5320 /* read coded block flag */
5321 if( is_dc || cat != 5 ) {
5322 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5324 h->non_zero_count_cache[scan8[n]] = 0;
5326 #ifdef CABAC_ON_STACK
5327 h->cabac.range = cc.range ;
5328 h->cabac.low = cc.low ;
5329 h->cabac.bytestream= cc.bytestream;
5335 significant_coeff_ctx_base = h->cabac_state
5336 + significant_coeff_flag_offset[MB_FIELD][cat];
5337 last_coeff_ctx_base = h->cabac_state
5338 + last_coeff_flag_offset[MB_FIELD][cat];
5339 abs_level_m1_ctx_base = h->cabac_state
5340 + coeff_abs_level_m1_offset[cat];
5342 if( !is_dc && cat == 5 ) {
5343 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5344 for(last= 0; last < coefs; last++) { \
5345 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5346 if( get_cabac( CC, sig_ctx )) { \
5347 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5348 index[coeff_count++] = last; \
5349 if( get_cabac( CC, last_ctx ) ) { \
5355 if( last == max_coeff -1 ) {\
5356 index[coeff_count++] = last;\
5358 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5359 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5360 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5362 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5364 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5366 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5369 assert(coeff_count > 0);
5373 h->cbp_table[h->mb_xy] |= 0x100;
5375 h->cbp_table[h->mb_xy] |= 0x40 << n;
5378 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5380 assert( cat == 1 || cat == 2 || cat == 4 );
5381 h->non_zero_count_cache[scan8[n]] = coeff_count;
5386 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5388 int j= scantable[index[--coeff_count]];
5390 if( get_cabac( CC, ctx ) == 0 ) {
5391 node_ctx = coeff_abs_level_transition[0][node_ctx];
5393 block[j] = get_cabac_bypass_sign( CC, -1);
5395 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5399 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5400 node_ctx = coeff_abs_level_transition[1][node_ctx];
5402 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5406 if( coeff_abs >= 15 ) {
5408 while( get_cabac_bypass( CC ) ) {
5414 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5420 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5422 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5425 } while( coeff_count );
5426 #ifdef CABAC_ON_STACK
5427 h->cabac.range = cc.range ;
5428 h->cabac.low = cc.low ;
5429 h->cabac.bytestream= cc.bytestream;
5435 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5436 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5439 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5440 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5444 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5446 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5448 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5449 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5453 static inline void compute_mb_neighbors(H264Context *h)
5455 MpegEncContext * const s = &h->s;
5456 const int mb_xy = h->mb_xy;
5457 h->top_mb_xy = mb_xy - s->mb_stride;
5458 h->left_mb_xy[0] = mb_xy - 1;
5460 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5461 const int top_pair_xy = pair_xy - s->mb_stride;
5462 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5463 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5464 const int curr_mb_field_flag = MB_FIELD;
5465 const int bottom = (s->mb_y & 1);
5467 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5468 h->top_mb_xy -= s->mb_stride;
5470 if (!left_mb_field_flag == curr_mb_field_flag) {
5471 h->left_mb_xy[0] = pair_xy - 1;
5473 } else if (FIELD_PICTURE) {
5474 h->top_mb_xy -= s->mb_stride;
5480 * decodes a macroblock
5481 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5483 static int decode_mb_cabac(H264Context *h) {
5484 MpegEncContext * const s = &h->s;
5486 int mb_type, partition_count, cbp = 0;
5487 int dct8x8_allowed= h->pps.transform_8x8_mode;
5489 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5491 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5492 if( h->slice_type_nos != FF_I_TYPE ) {
5494 /* a skipped mb needs the aff flag from the following mb */
5495 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5496 predict_field_decoding_flag(h);
5497 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5498 skip = h->next_mb_skipped;
5500 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5501 /* read skip flags */
5503 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5504 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5505 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5506 if(!h->next_mb_skipped)
5507 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5512 h->cbp_table[mb_xy] = 0;
5513 h->chroma_pred_mode_table[mb_xy] = 0;
5514 h->last_qscale_diff = 0;
5521 if( (s->mb_y&1) == 0 )
5523 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5526 h->prev_mb_skipped = 0;
5528 compute_mb_neighbors(h);
5530 if( h->slice_type_nos == FF_B_TYPE ) {
5531 mb_type = decode_cabac_mb_type_b( h );
5533 partition_count= b_mb_type_info[mb_type].partition_count;
5534 mb_type= b_mb_type_info[mb_type].type;
5537 goto decode_intra_mb;
5539 } else if( h->slice_type_nos == FF_P_TYPE ) {
5540 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5542 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5543 /* P_L0_D16x16, P_8x8 */
5544 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5546 /* P_L0_D8x16, P_L0_D16x8 */
5547 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5549 partition_count= p_mb_type_info[mb_type].partition_count;
5550 mb_type= p_mb_type_info[mb_type].type;
5552 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5553 goto decode_intra_mb;
5556 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5557 if(h->slice_type == FF_SI_TYPE && mb_type)
5559 assert(h->slice_type_nos == FF_I_TYPE);
5561 partition_count = 0;
5562 cbp= i_mb_type_info[mb_type].cbp;
5563 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5564 mb_type= i_mb_type_info[mb_type].type;
5567 mb_type |= MB_TYPE_INTERLACED;
5569 h->slice_table[ mb_xy ]= h->slice_num;
5571 if(IS_INTRA_PCM(mb_type)) {
5574 // We assume these blocks are very rare so we do not optimize it.
5575 // FIXME The two following lines get the bitstream position in the cabac
5576 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5577 ptr= h->cabac.bytestream;
5578 if(h->cabac.low&0x1) ptr--;
5580 if(h->cabac.low&0x1FF) ptr--;
5583 // The pixels are stored in the same order as levels in h->mb array.
5584 memcpy(h->mb, ptr, 256); ptr+=256;
5586 memcpy(h->mb+128, ptr, 128); ptr+=128;
5589 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5591 // All blocks are present
5592 h->cbp_table[mb_xy] = 0x1ef;
5593 h->chroma_pred_mode_table[mb_xy] = 0;
5594 // In deblocking, the quantizer is 0
5595 s->current_picture.qscale_table[mb_xy]= 0;
5596 // All coeffs are present
5597 memset(h->non_zero_count[mb_xy], 16, 16);
5598 s->current_picture.mb_type[mb_xy]= mb_type;
5599 h->last_qscale_diff = 0;
5604 h->ref_count[0] <<= 1;
5605 h->ref_count[1] <<= 1;
5608 fill_caches(h, mb_type, 0);
5610 if( IS_INTRA( mb_type ) ) {
5612 if( IS_INTRA4x4( mb_type ) ) {
5613 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5614 mb_type |= MB_TYPE_8x8DCT;
5615 for( i = 0; i < 16; i+=4 ) {
5616 int pred = pred_intra_mode( h, i );
5617 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5618 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5621 for( i = 0; i < 16; i++ ) {
5622 int pred = pred_intra_mode( h, i );
5623 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5625 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5628 write_back_intra_pred_mode(h);
5629 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5631 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5632 if( h->intra16x16_pred_mode < 0 ) return -1;
5635 h->chroma_pred_mode_table[mb_xy] =
5636 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5638 pred_mode= check_intra_pred_mode( h, pred_mode );
5639 if( pred_mode < 0 ) return -1;
5640 h->chroma_pred_mode= pred_mode;
5642 } else if( partition_count == 4 ) {
5643 int i, j, sub_partition_count[4], list, ref[2][4];
5645 if( h->slice_type_nos == FF_B_TYPE ) {
5646 for( i = 0; i < 4; i++ ) {
5647 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5648 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5649 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5651 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5652 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5653 pred_direct_motion(h, &mb_type);
5654 h->ref_cache[0][scan8[4]] =
5655 h->ref_cache[1][scan8[4]] =
5656 h->ref_cache[0][scan8[12]] =
5657 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5658 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5659 for( i = 0; i < 4; i++ )
5660 if( IS_DIRECT(h->sub_mb_type[i]) )
5661 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5665 for( i = 0; i < 4; i++ ) {
5666 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5667 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5668 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5672 for( list = 0; list < h->list_count; list++ ) {
5673 for( i = 0; i < 4; i++ ) {
5674 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5675 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5676 if( h->ref_count[list] > 1 ){
5677 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5678 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5679 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5687 h->ref_cache[list][ scan8[4*i]+1 ]=
5688 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5693 dct8x8_allowed = get_dct8x8_allowed(h);
5695 for(list=0; list<h->list_count; list++){
5697 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5698 if(IS_DIRECT(h->sub_mb_type[i])){
5699 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5703 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5704 const int sub_mb_type= h->sub_mb_type[i];
5705 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5706 for(j=0; j<sub_partition_count[i]; j++){
5709 const int index= 4*i + block_width*j;
5710 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5711 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5712 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5714 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5715 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5716 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5718 if(IS_SUB_8X8(sub_mb_type)){
5720 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5722 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5725 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5727 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5728 }else if(IS_SUB_8X4(sub_mb_type)){
5729 mv_cache[ 1 ][0]= mx;
5730 mv_cache[ 1 ][1]= my;
5732 mvd_cache[ 1 ][0]= mx - mpx;
5733 mvd_cache[ 1 ][1]= my - mpy;
5734 }else if(IS_SUB_4X8(sub_mb_type)){
5735 mv_cache[ 8 ][0]= mx;
5736 mv_cache[ 8 ][1]= my;
5738 mvd_cache[ 8 ][0]= mx - mpx;
5739 mvd_cache[ 8 ][1]= my - mpy;
5741 mv_cache[ 0 ][0]= mx;
5742 mv_cache[ 0 ][1]= my;
5744 mvd_cache[ 0 ][0]= mx - mpx;
5745 mvd_cache[ 0 ][1]= my - mpy;
5748 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5749 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5750 p[0] = p[1] = p[8] = p[9] = 0;
5751 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5755 } else if( IS_DIRECT(mb_type) ) {
5756 pred_direct_motion(h, &mb_type);
5757 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5758 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5759 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5761 int list, mx, my, i, mpx, mpy;
5762 if(IS_16X16(mb_type)){
5763 for(list=0; list<h->list_count; list++){
5764 if(IS_DIR(mb_type, 0, list)){
5766 if(h->ref_count[list] > 1){
5767 ref= decode_cabac_mb_ref(h, list, 0);
5768 if(ref >= (unsigned)h->ref_count[list]){
5769 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5774 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5776 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5778 for(list=0; list<h->list_count; list++){
5779 if(IS_DIR(mb_type, 0, list)){
5780 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5782 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5783 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5784 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5786 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5787 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5789 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5792 else if(IS_16X8(mb_type)){
5793 for(list=0; list<h->list_count; list++){
5795 if(IS_DIR(mb_type, i, list)){
5797 if(h->ref_count[list] > 1){
5798 ref= decode_cabac_mb_ref( h, list, 8*i );
5799 if(ref >= (unsigned)h->ref_count[list]){
5800 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5805 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5807 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5810 for(list=0; list<h->list_count; list++){
5812 if(IS_DIR(mb_type, i, list)){
5813 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5814 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5815 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5816 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5818 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5819 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5821 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5822 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5827 assert(IS_8X16(mb_type));
5828 for(list=0; list<h->list_count; list++){
5830 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5832 if(h->ref_count[list] > 1){
5833 ref= decode_cabac_mb_ref( h, list, 4*i );
5834 if(ref >= (unsigned)h->ref_count[list]){
5835 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5840 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5842 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5845 for(list=0; list<h->list_count; list++){
5847 if(IS_DIR(mb_type, i, list)){
5848 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5849 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5850 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5852 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5853 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5854 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5856 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5857 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5864 if( IS_INTER( mb_type ) ) {
5865 h->chroma_pred_mode_table[mb_xy] = 0;
5866 write_back_motion( h, mb_type );
5869 if( !IS_INTRA16x16( mb_type ) ) {
5870 cbp = decode_cabac_mb_cbp_luma( h );
5872 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5875 h->cbp_table[mb_xy] = h->cbp = cbp;
5877 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5878 if( decode_cabac_mb_transform_size( h ) )
5879 mb_type |= MB_TYPE_8x8DCT;
5881 s->current_picture.mb_type[mb_xy]= mb_type;
5883 if( cbp || IS_INTRA16x16( mb_type ) ) {
5884 const uint8_t *scan, *scan8x8, *dc_scan;
5885 const uint32_t *qmul;
5888 if(IS_INTERLACED(mb_type)){
5889 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5890 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5891 dc_scan= luma_dc_field_scan;
5893 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5894 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5895 dc_scan= luma_dc_zigzag_scan;
5898 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5899 if( dqp == INT_MIN ){
5900 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5904 if(((unsigned)s->qscale) > 51){
5905 if(s->qscale<0) s->qscale+= 52;
5906 else s->qscale-= 52;
5908 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5909 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5911 if( IS_INTRA16x16( mb_type ) ) {
5913 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5914 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5917 qmul = h->dequant4_coeff[0][s->qscale];
5918 for( i = 0; i < 16; i++ ) {
5919 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5920 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5923 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5927 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5928 if( cbp & (1<<i8x8) ) {
5929 if( IS_8x8DCT(mb_type) ) {
5930 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5931 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5933 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5934 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5935 const int index = 4*i8x8 + i4x4;
5936 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5938 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5939 //STOP_TIMER("decode_residual")
5943 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5944 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5951 for( c = 0; c < 2; c++ ) {
5952 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5953 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5959 for( c = 0; c < 2; c++ ) {
5960 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5961 for( i = 0; i < 4; i++ ) {
5962 const int index = 16 + 4 * c + i;
5963 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5964 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5968 uint8_t * const nnz= &h->non_zero_count_cache[0];
5969 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5970 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5973 uint8_t * const nnz= &h->non_zero_count_cache[0];
5974 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5975 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5976 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5977 h->last_qscale_diff = 0;
5980 s->current_picture.qscale_table[mb_xy]= s->qscale;
5981 write_back_non_zero_count(h);
5984 h->ref_count[0] >>= 1;
5985 h->ref_count[1] >>= 1;
5992 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5993 const int index_a = qp + h->slice_alpha_c0_offset;
5994 const int alpha = (alpha_table+52)[index_a];
5995 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5999 tc[0] = (tc0_table+52)[index_a][bS[0]];
6000 tc[1] = (tc0_table+52)[index_a][bS[1]];
6001 tc[2] = (tc0_table+52)[index_a][bS[2]];
6002 tc[3] = (tc0_table+52)[index_a][bS[3]];
6003 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6005 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
6008 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6009 const int index_a = qp + h->slice_alpha_c0_offset;
6010 const int alpha = (alpha_table+52)[index_a];
6011 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6015 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6016 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6017 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6018 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6019 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6021 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6025 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6027 for( i = 0; i < 16; i++, pix += stride) {
6033 int bS_index = (i >> 1);
6036 bS_index |= (i & 1);
6039 if( bS[bS_index] == 0 ) {
6043 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6044 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6045 alpha = (alpha_table+52)[index_a];
6046 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6048 if( bS[bS_index] < 4 ) {
6049 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6050 const int p0 = pix[-1];
6051 const int p1 = pix[-2];
6052 const int p2 = pix[-3];
6053 const int q0 = pix[0];
6054 const int q1 = pix[1];
6055 const int q2 = pix[2];
6057 if( FFABS( p0 - q0 ) < alpha &&
6058 FFABS( p1 - p0 ) < beta &&
6059 FFABS( q1 - q0 ) < beta ) {
6063 if( FFABS( p2 - p0 ) < beta ) {
6064 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6067 if( FFABS( q2 - q0 ) < beta ) {
6068 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6072 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6073 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6074 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6075 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6078 const int p0 = pix[-1];
6079 const int p1 = pix[-2];
6080 const int p2 = pix[-3];
6082 const int q0 = pix[0];
6083 const int q1 = pix[1];
6084 const int q2 = pix[2];
6086 if( FFABS( p0 - q0 ) < alpha &&
6087 FFABS( p1 - p0 ) < beta &&
6088 FFABS( q1 - q0 ) < beta ) {
6090 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6091 if( FFABS( p2 - p0 ) < beta)
6093 const int p3 = pix[-4];
6095 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6096 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6097 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6100 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6102 if( FFABS( q2 - q0 ) < beta)
6104 const int q3 = pix[3];
6106 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6107 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6108 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6111 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6115 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6116 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6118 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6123 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6125 for( i = 0; i < 8; i++, pix += stride) {
6133 if( bS[bS_index] == 0 ) {
6137 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6138 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6139 alpha = (alpha_table+52)[index_a];
6140 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6142 if( bS[bS_index] < 4 ) {
6143 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6144 const int p0 = pix[-1];
6145 const int p1 = pix[-2];
6146 const int q0 = pix[0];
6147 const int q1 = pix[1];
6149 if( FFABS( p0 - q0 ) < alpha &&
6150 FFABS( p1 - p0 ) < beta &&
6151 FFABS( q1 - q0 ) < beta ) {
6152 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6154 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6155 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6156 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6159 const int p0 = pix[-1];
6160 const int p1 = pix[-2];
6161 const int q0 = pix[0];
6162 const int q1 = pix[1];
6164 if( FFABS( p0 - q0 ) < alpha &&
6165 FFABS( p1 - p0 ) < beta &&
6166 FFABS( q1 - q0 ) < beta ) {
6168 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6169 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6170 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6176 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6177 const int index_a = qp + h->slice_alpha_c0_offset;
6178 const int alpha = (alpha_table+52)[index_a];
6179 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6183 tc[0] = (tc0_table+52)[index_a][bS[0]];
6184 tc[1] = (tc0_table+52)[index_a][bS[1]];
6185 tc[2] = (tc0_table+52)[index_a][bS[2]];
6186 tc[3] = (tc0_table+52)[index_a][bS[3]];
6187 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6189 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6193 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6194 const int index_a = qp + h->slice_alpha_c0_offset;
6195 const int alpha = (alpha_table+52)[index_a];
6196 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6200 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6201 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6202 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6203 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6204 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6206 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6210 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6211 MpegEncContext * const s = &h->s;
6212 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6214 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6218 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6219 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6220 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6221 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6222 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6225 assert(!FRAME_MBAFF);
6227 mb_type = s->current_picture.mb_type[mb_xy];
6228 qp = s->current_picture.qscale_table[mb_xy];
6229 qp0 = s->current_picture.qscale_table[mb_xy-1];
6230 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6231 qpc = get_chroma_qp( h, 0, qp );
6232 qpc0 = get_chroma_qp( h, 0, qp0 );
6233 qpc1 = get_chroma_qp( h, 0, qp1 );
6234 qp0 = (qp + qp0 + 1) >> 1;
6235 qp1 = (qp + qp1 + 1) >> 1;
6236 qpc0 = (qpc + qpc0 + 1) >> 1;
6237 qpc1 = (qpc + qpc1 + 1) >> 1;
6238 qp_thresh = 15 - h->slice_alpha_c0_offset;
6239 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6240 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6243 if( IS_INTRA(mb_type) ) {
6244 int16_t bS4[4] = {4,4,4,4};
6245 int16_t bS3[4] = {3,3,3,3};
6246 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6247 if( IS_8x8DCT(mb_type) ) {
6248 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6249 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6250 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6251 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6253 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6254 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6255 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6256 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6257 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6258 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6259 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6260 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6262 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6263 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6264 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6265 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6266 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6267 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6268 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6269 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6272 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6273 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6275 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6277 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6279 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6280 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6281 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6282 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6284 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6285 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6286 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6287 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6289 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6290 bSv[0][0] = 0x0004000400040004ULL;
6291 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6292 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6294 #define FILTER(hv,dir,edge)\
6295 if(bSv[dir][edge]) {\
6296 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6298 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6299 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6305 } else if( IS_8x8DCT(mb_type) ) {
6325 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6326 MpegEncContext * const s = &h->s;
6328 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6329 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6330 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6331 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6332 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6334 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6335 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6336 // how often to recheck mv-based bS when iterating between edges
6337 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6338 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6339 // how often to recheck mv-based bS when iterating along each edge
6340 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6342 if (first_vertical_edge_done) {
6346 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6349 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6350 && !IS_INTERLACED(mb_type)
6351 && IS_INTERLACED(mbm_type)
6353 // This is a special case in the norm where the filtering must
6354 // be done twice (one each of the field) even if we are in a
6355 // frame macroblock.
6357 static const int nnz_idx[4] = {4,5,6,3};
6358 unsigned int tmp_linesize = 2 * linesize;
6359 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6360 int mbn_xy = mb_xy - 2 * s->mb_stride;
6365 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6366 if( IS_INTRA(mb_type) ||
6367 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6368 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6370 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6371 for( i = 0; i < 4; i++ ) {
6372 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6373 mbn_nnz[nnz_idx[i]] != 0 )
6379 // Do not use s->qscale as luma quantizer because it has not the same
6380 // value in IPCM macroblocks.
6381 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6382 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6383 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6384 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6385 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6386 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6387 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6388 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6395 for( edge = start; edge < edges; edge++ ) {
6396 /* mbn_xy: neighbor macroblock */
6397 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6398 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6399 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6403 if( (edge&1) && IS_8x8DCT(mb_type) )
6406 if( IS_INTRA(mb_type) ||
6407 IS_INTRA(mbn_type) ) {
6410 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6411 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6420 bS[0] = bS[1] = bS[2] = bS[3] = value;
6425 if( edge & mask_edge ) {
6426 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6429 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6430 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6433 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6434 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6435 int bn_idx= b_idx - (dir ? 8:1);
6438 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6439 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6440 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6441 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6444 if(h->slice_type_nos == FF_B_TYPE && v){
6446 for( l = 0; !v && l < 2; l++ ) {
6448 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6449 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6450 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6454 bS[0] = bS[1] = bS[2] = bS[3] = v;
6460 for( i = 0; i < 4; i++ ) {
6461 int x = dir == 0 ? edge : i;
6462 int y = dir == 0 ? i : edge;
6463 int b_idx= 8 + 4 + x + 8*y;
6464 int bn_idx= b_idx - (dir ? 8:1);
6466 if( h->non_zero_count_cache[b_idx] |
6467 h->non_zero_count_cache[bn_idx] ) {
6473 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6474 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6475 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6476 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6482 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6484 for( l = 0; l < 2; l++ ) {
6486 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6487 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6488 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6497 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6502 // Do not use s->qscale as luma quantizer because it has not the same
6503 // value in IPCM macroblocks.
6504 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6505 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6506 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6507 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6509 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6510 if( (edge&1) == 0 ) {
6511 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6512 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6513 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6514 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6517 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6518 if( (edge&1) == 0 ) {
6519 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6520 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6521 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6522 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6528 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6529 MpegEncContext * const s = &h->s;
6530 const int mb_xy= mb_x + mb_y*s->mb_stride;
6531 const int mb_type = s->current_picture.mb_type[mb_xy];
6532 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6533 int first_vertical_edge_done = 0;
6536 //for sufficiently low qp, filtering wouldn't do anything
6537 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6539 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6540 int qp = s->current_picture.qscale_table[mb_xy];
6542 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6543 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6548 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6549 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6550 int top_type, left_type[2];
6551 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6552 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6553 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6555 if(IS_8x8DCT(top_type)){
6556 h->non_zero_count_cache[4+8*0]=
6557 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6558 h->non_zero_count_cache[6+8*0]=
6559 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6561 if(IS_8x8DCT(left_type[0])){
6562 h->non_zero_count_cache[3+8*1]=
6563 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6565 if(IS_8x8DCT(left_type[1])){
6566 h->non_zero_count_cache[3+8*3]=
6567 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6570 if(IS_8x8DCT(mb_type)){
6571 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6572 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6574 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6575 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6577 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6578 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6580 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6581 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6586 // left mb is in picture
6587 && h->slice_table[mb_xy-1] != 0xFFFF
6588 // and current and left pair do not have the same interlaced type
6589 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6590 // and left mb is in the same slice if deblocking_filter == 2
6591 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6592 /* First vertical edge is different in MBAFF frames
6593 * There are 8 different bS to compute and 2 different Qp
6595 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6596 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6601 int mb_qp, mbn0_qp, mbn1_qp;
6603 first_vertical_edge_done = 1;
6605 if( IS_INTRA(mb_type) )
6606 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6608 for( i = 0; i < 8; i++ ) {
6609 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6611 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6613 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6614 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6615 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6617 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6624 mb_qp = s->current_picture.qscale_table[mb_xy];
6625 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6626 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6627 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6628 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6629 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6630 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6631 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6632 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6633 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6634 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6635 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6636 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6639 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6640 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6641 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6642 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6643 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6647 for( dir = 0; dir < 2; dir++ )
6648 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6650 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6651 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6655 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6656 H264Context *h = *(void**)arg;
6657 MpegEncContext * const s = &h->s;
6658 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6662 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6663 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6665 if( h->pps.cabac ) {
6669 align_get_bits( &s->gb );
6672 ff_init_cabac_states( &h->cabac);
6673 ff_init_cabac_decoder( &h->cabac,
6674 s->gb.buffer + get_bits_count(&s->gb)/8,
6675 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6676 /* calculate pre-state */
6677 for( i= 0; i < 460; i++ ) {
6679 if( h->slice_type_nos == FF_I_TYPE )
6680 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6682 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6685 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6687 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6692 int ret = decode_mb_cabac(h);
6694 //STOP_TIMER("decode_mb_cabac")
6696 if(ret>=0) hl_decode_mb(h);
6698 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6701 ret = decode_mb_cabac(h);
6703 if(ret>=0) hl_decode_mb(h);
6706 eos = get_cabac_terminate( &h->cabac );
6708 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6709 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6710 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6714 if( ++s->mb_x >= s->mb_width ) {
6716 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6718 if(FIELD_OR_MBAFF_PICTURE) {
6723 if( eos || s->mb_y >= s->mb_height ) {
6724 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6725 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6732 int ret = decode_mb_cavlc(h);
6734 if(ret>=0) hl_decode_mb(h);
6736 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6738 ret = decode_mb_cavlc(h);
6740 if(ret>=0) hl_decode_mb(h);
6745 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6751 if(++s->mb_x >= s->mb_width){
6753 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6755 if(FIELD_OR_MBAFF_PICTURE) {
6758 if(s->mb_y >= s->mb_height){
6759 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6761 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6762 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6766 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6773 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6774 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6775 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6776 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6780 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6789 for(;s->mb_y < s->mb_height; s->mb_y++){
6790 for(;s->mb_x < s->mb_width; s->mb_x++){
6791 int ret= decode_mb(h);
6796 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6797 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6802 if(++s->mb_x >= s->mb_width){
6804 if(++s->mb_y >= s->mb_height){
6805 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6806 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6810 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6817 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6818 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6819 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6823 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6830 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6833 return -1; //not reached
6836 static int decode_picture_timing(H264Context *h){
6837 MpegEncContext * const s = &h->s;
6838 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6839 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6840 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6842 if(h->sps.pic_struct_present_flag){
6843 unsigned int i, num_clock_ts;
6844 h->sei_pic_struct = get_bits(&s->gb, 4);
6847 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6850 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6852 for (i = 0 ; i < num_clock_ts ; i++){
6853 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6854 unsigned int full_timestamp_flag;
6855 h->sei_ct_type |= 1<<get_bits(&s->gb, 2);
6856 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6857 skip_bits(&s->gb, 5); /* counting_type */
6858 full_timestamp_flag = get_bits(&s->gb, 1);
6859 skip_bits(&s->gb, 1); /* discontinuity_flag */
6860 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6861 skip_bits(&s->gb, 8); /* n_frames */
6862 if(full_timestamp_flag){
6863 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6864 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6865 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6867 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6868 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6869 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6870 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6871 if(get_bits(&s->gb, 1)) /* hours_flag */
6872 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6876 if(h->sps.time_offset_length > 0)
6877 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6884 static int decode_unregistered_user_data(H264Context *h, int size){
6885 MpegEncContext * const s = &h->s;
6886 uint8_t user_data[16+256];
6892 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6893 user_data[i]= get_bits(&s->gb, 8);
6897 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6898 if(e==1 && build>=0)
6899 h->x264_build= build;
6901 if(s->avctx->debug & FF_DEBUG_BUGS)
6902 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6905 skip_bits(&s->gb, 8);
6910 static int decode_recovery_point(H264Context *h){
6911 MpegEncContext * const s = &h->s;
6913 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6914 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6919 static int decode_buffering_period(H264Context *h){
6920 MpegEncContext * const s = &h->s;
6921 unsigned int sps_id;
6925 sps_id = get_ue_golomb_31(&s->gb);
6926 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6927 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6930 sps = h->sps_buffers[sps_id];
6932 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6933 if (sps->nal_hrd_parameters_present_flag) {
6934 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6935 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6936 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6939 if (sps->vcl_hrd_parameters_present_flag) {
6940 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6941 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6942 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6946 h->sei_buffering_period_present = 1;
6950 int ff_h264_decode_sei(H264Context *h){
6951 MpegEncContext * const s = &h->s;
6953 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6958 type+= show_bits(&s->gb, 8);
6959 }while(get_bits(&s->gb, 8) == 255);
6963 size+= show_bits(&s->gb, 8);
6964 }while(get_bits(&s->gb, 8) == 255);
6967 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6968 if(decode_picture_timing(h) < 0)
6971 case SEI_TYPE_USER_DATA_UNREGISTERED:
6972 if(decode_unregistered_user_data(h, size) < 0)
6975 case SEI_TYPE_RECOVERY_POINT:
6976 if(decode_recovery_point(h) < 0)
6979 case SEI_BUFFERING_PERIOD:
6980 if(decode_buffering_period(h) < 0)
6984 skip_bits(&s->gb, 8*size);
6987 //FIXME check bits here
6988 align_get_bits(&s->gb);
6994 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6995 MpegEncContext * const s = &h->s;
6997 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6999 if(cpb_count > 32U){
7000 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
7004 get_bits(&s->gb, 4); /* bit_rate_scale */
7005 get_bits(&s->gb, 4); /* cpb_size_scale */
7006 for(i=0; i<cpb_count; i++){
7007 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7008 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7009 get_bits1(&s->gb); /* cbr_flag */
7011 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7012 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7013 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
7014 sps->time_offset_length = get_bits(&s->gb, 5);
7015 sps->cpb_cnt = cpb_count;
7019 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7020 MpegEncContext * const s = &h->s;
7021 int aspect_ratio_info_present_flag;
7022 unsigned int aspect_ratio_idc;
7024 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7026 if( aspect_ratio_info_present_flag ) {
7027 aspect_ratio_idc= get_bits(&s->gb, 8);
7028 if( aspect_ratio_idc == EXTENDED_SAR ) {
7029 sps->sar.num= get_bits(&s->gb, 16);
7030 sps->sar.den= get_bits(&s->gb, 16);
7031 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
7032 sps->sar= pixel_aspect[aspect_ratio_idc];
7034 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7041 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7043 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7044 get_bits1(&s->gb); /* overscan_appropriate_flag */
7047 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7048 get_bits(&s->gb, 3); /* video_format */
7049 get_bits1(&s->gb); /* video_full_range_flag */
7050 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7051 get_bits(&s->gb, 8); /* colour_primaries */
7052 get_bits(&s->gb, 8); /* transfer_characteristics */
7053 get_bits(&s->gb, 8); /* matrix_coefficients */
7057 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7058 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7059 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7062 sps->timing_info_present_flag = get_bits1(&s->gb);
7063 if(sps->timing_info_present_flag){
7064 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7065 sps->time_scale = get_bits_long(&s->gb, 32);
7066 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7069 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7070 if(sps->nal_hrd_parameters_present_flag)
7071 if(decode_hrd_parameters(h, sps) < 0)
7073 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7074 if(sps->vcl_hrd_parameters_present_flag)
7075 if(decode_hrd_parameters(h, sps) < 0)
7077 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7078 get_bits1(&s->gb); /* low_delay_hrd_flag */
7079 sps->pic_struct_present_flag = get_bits1(&s->gb);
7081 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7082 if(sps->bitstream_restriction_flag){
7083 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7084 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7085 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7086 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7087 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7088 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7089 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7091 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7092 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7100 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7101 const uint8_t *jvt_list, const uint8_t *fallback_list){
7102 MpegEncContext * const s = &h->s;
7103 int i, last = 8, next = 8;
7104 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7105 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7106 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7108 for(i=0;i<size;i++){
7110 next = (last + get_se_golomb(&s->gb)) & 0xff;
7111 if(!i && !next){ /* matrix not written, we use the preset one */
7112 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7115 last = factors[scan[i]] = next ? next : last;
7119 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7120 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7121 MpegEncContext * const s = &h->s;
7122 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7123 const uint8_t *fallback[4] = {
7124 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7125 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7126 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7127 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7129 if(get_bits1(&s->gb)){
7130 sps->scaling_matrix_present |= is_sps;
7131 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7132 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7133 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7134 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7135 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7136 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7137 if(is_sps || pps->transform_8x8_mode){
7138 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7139 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7144 int ff_h264_decode_seq_parameter_set(H264Context *h){
7145 MpegEncContext * const s = &h->s;
7146 int profile_idc, level_idc;
7147 unsigned int sps_id;
7151 profile_idc= get_bits(&s->gb, 8);
7152 get_bits1(&s->gb); //constraint_set0_flag
7153 get_bits1(&s->gb); //constraint_set1_flag
7154 get_bits1(&s->gb); //constraint_set2_flag
7155 get_bits1(&s->gb); //constraint_set3_flag
7156 get_bits(&s->gb, 4); // reserved
7157 level_idc= get_bits(&s->gb, 8);
7158 sps_id= get_ue_golomb_31(&s->gb);
7160 if(sps_id >= MAX_SPS_COUNT) {
7161 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7164 sps= av_mallocz(sizeof(SPS));
7168 sps->profile_idc= profile_idc;
7169 sps->level_idc= level_idc;
7171 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7172 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7173 sps->scaling_matrix_present = 0;
7175 if(sps->profile_idc >= 100){ //high profile
7176 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7177 if(sps->chroma_format_idc == 3)
7178 sps->residual_color_transform_flag = get_bits1(&s->gb);
7179 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7180 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7181 sps->transform_bypass = get_bits1(&s->gb);
7182 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7184 sps->chroma_format_idc= 1;
7187 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7188 sps->poc_type= get_ue_golomb_31(&s->gb);
7190 if(sps->poc_type == 0){ //FIXME #define
7191 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7192 } else if(sps->poc_type == 1){//FIXME #define
7193 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7194 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7195 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7196 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7198 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7199 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7203 for(i=0; i<sps->poc_cycle_length; i++)
7204 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7205 }else if(sps->poc_type != 2){
7206 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7210 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7211 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7212 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7215 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7216 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7217 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7218 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7219 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7220 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7224 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7225 if(!sps->frame_mbs_only_flag)
7226 sps->mb_aff= get_bits1(&s->gb);
7230 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7232 #ifndef ALLOW_INTERLACE
7234 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7236 sps->crop= get_bits1(&s->gb);
7238 sps->crop_left = get_ue_golomb(&s->gb);
7239 sps->crop_right = get_ue_golomb(&s->gb);
7240 sps->crop_top = get_ue_golomb(&s->gb);
7241 sps->crop_bottom= get_ue_golomb(&s->gb);
7242 if(sps->crop_left || sps->crop_top){
7243 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7245 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7246 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7252 sps->crop_bottom= 0;
7255 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7256 if( sps->vui_parameters_present_flag )
7257 decode_vui_parameters(h, sps);
7259 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7260 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7261 sps_id, sps->profile_idc, sps->level_idc,
7263 sps->ref_frame_count,
7264 sps->mb_width, sps->mb_height,
7265 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7266 sps->direct_8x8_inference_flag ? "8B8" : "",
7267 sps->crop_left, sps->crop_right,
7268 sps->crop_top, sps->crop_bottom,
7269 sps->vui_parameters_present_flag ? "VUI" : "",
7270 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc],
7271 sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
7272 sps->timing_info_present_flag ? sps->time_scale : 0
7276 av_free(h->sps_buffers[sps_id]);
7277 h->sps_buffers[sps_id]= sps;
7286 build_qp_table(PPS *pps, int t, int index)
7289 for(i = 0; i < 52; i++)
7290 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7293 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7294 MpegEncContext * const s = &h->s;
7295 unsigned int pps_id= get_ue_golomb(&s->gb);
7298 if(pps_id >= MAX_PPS_COUNT) {
7299 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7303 pps= av_mallocz(sizeof(PPS));
7306 pps->sps_id= get_ue_golomb_31(&s->gb);
7307 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7308 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7312 pps->cabac= get_bits1(&s->gb);
7313 pps->pic_order_present= get_bits1(&s->gb);
7314 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7315 if(pps->slice_group_count > 1 ){
7316 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7317 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7318 switch(pps->mb_slice_group_map_type){
7321 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7322 | run_length[ i ] |1 |ue(v) |
7327 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7329 | top_left_mb[ i ] |1 |ue(v) |
7330 | bottom_right_mb[ i ] |1 |ue(v) |
7338 | slice_group_change_direction_flag |1 |u(1) |
7339 | slice_group_change_rate_minus1 |1 |ue(v) |
7344 | slice_group_id_cnt_minus1 |1 |ue(v) |
7345 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7347 | slice_group_id[ i ] |1 |u(v) |
7352 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7353 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7354 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7355 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7359 pps->weighted_pred= get_bits1(&s->gb);
7360 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7361 pps->init_qp= get_se_golomb(&s->gb) + 26;
7362 pps->init_qs= get_se_golomb(&s->gb) + 26;
7363 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7364 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7365 pps->constrained_intra_pred= get_bits1(&s->gb);
7366 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7368 pps->transform_8x8_mode= 0;
7369 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7370 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7371 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7373 if(get_bits_count(&s->gb) < bit_length){
7374 pps->transform_8x8_mode= get_bits1(&s->gb);
7375 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7376 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7378 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7381 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7382 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7383 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7384 h->pps.chroma_qp_diff= 1;
7386 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7387 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7388 pps_id, pps->sps_id,
7389 pps->cabac ? "CABAC" : "CAVLC",
7390 pps->slice_group_count,
7391 pps->ref_count[0], pps->ref_count[1],
7392 pps->weighted_pred ? "weighted" : "",
7393 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7394 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7395 pps->constrained_intra_pred ? "CONSTR" : "",
7396 pps->redundant_pic_cnt_present ? "REDU" : "",
7397 pps->transform_8x8_mode ? "8x8DCT" : ""
7401 av_free(h->pps_buffers[pps_id]);
7402 h->pps_buffers[pps_id]= pps;
7410 * Call decode_slice() for each context.
7412 * @param h h264 master context
7413 * @param context_count number of contexts to execute
7415 static void execute_decode_slices(H264Context *h, int context_count){
7416 MpegEncContext * const s = &h->s;
7417 AVCodecContext * const avctx= s->avctx;
7421 if (s->avctx->hwaccel)
7423 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7425 if(context_count == 1) {
7426 decode_slice(avctx, &h);
7428 for(i = 1; i < context_count; i++) {
7429 hx = h->thread_context[i];
7430 hx->s.error_recognition = avctx->error_recognition;
7431 hx->s.error_count = 0;
7434 avctx->execute(avctx, (void *)decode_slice,
7435 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7437 /* pull back stuff from slices to master context */
7438 hx = h->thread_context[context_count - 1];
7439 s->mb_x = hx->s.mb_x;
7440 s->mb_y = hx->s.mb_y;
7441 s->dropable = hx->s.dropable;
7442 s->picture_structure = hx->s.picture_structure;
7443 for(i = 1; i < context_count; i++)
7444 h->s.error_count += h->thread_context[i]->s.error_count;
7449 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7450 MpegEncContext * const s = &h->s;
7451 AVCodecContext * const avctx= s->avctx;
7453 H264Context *hx; ///< thread context
7454 int context_count = 0;
7456 h->max_contexts = avctx->thread_count;
7459 for(i=0; i<50; i++){
7460 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7463 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7464 h->current_slice = 0;
7465 if (!s->first_field)
7466 s->current_picture_ptr= NULL;
7479 if(buf_index >= buf_size) break;
7481 for(i = 0; i < h->nal_length_size; i++)
7482 nalsize = (nalsize << 8) | buf[buf_index++];
7483 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7488 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7493 // start code prefix search
7494 for(; buf_index + 3 < buf_size; buf_index++){
7495 // This should always succeed in the first iteration.
7496 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7500 if(buf_index+3 >= buf_size) break;
7505 hx = h->thread_context[context_count];
7507 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7508 if (ptr==NULL || dst_length < 0){
7511 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7513 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7515 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7516 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7519 if (h->is_avc && (nalsize != consumed)){
7520 int i, debug_level = AV_LOG_DEBUG;
7521 for (i = consumed; i < nalsize; i++)
7522 if (buf[buf_index+i])
7523 debug_level = AV_LOG_ERROR;
7524 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7528 buf_index += consumed;
7530 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7531 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7536 switch(hx->nal_unit_type){
7538 if (h->nal_unit_type != NAL_IDR_SLICE) {
7539 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7542 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7544 init_get_bits(&hx->s.gb, ptr, bit_length);
7546 hx->inter_gb_ptr= &hx->s.gb;
7547 hx->s.data_partitioning = 0;
7549 if((err = decode_slice_header(hx, h)))
7552 if (s->avctx->hwaccel && h->current_slice == 1) {
7553 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7557 s->current_picture_ptr->key_frame |=
7558 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7559 (h->sei_recovery_frame_cnt >= 0);
7560 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7561 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7562 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7563 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7564 && avctx->skip_frame < AVDISCARD_ALL){
7565 if(avctx->hwaccel) {
7566 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7569 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7570 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7571 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7572 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7578 init_get_bits(&hx->s.gb, ptr, bit_length);
7580 hx->inter_gb_ptr= NULL;
7581 hx->s.data_partitioning = 1;
7583 err = decode_slice_header(hx, h);
7586 init_get_bits(&hx->intra_gb, ptr, bit_length);
7587 hx->intra_gb_ptr= &hx->intra_gb;
7590 init_get_bits(&hx->inter_gb, ptr, bit_length);
7591 hx->inter_gb_ptr= &hx->inter_gb;
7593 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7594 && s->context_initialized
7596 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7597 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7598 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7599 && avctx->skip_frame < AVDISCARD_ALL)
7603 init_get_bits(&s->gb, ptr, bit_length);
7604 ff_h264_decode_sei(h);
7607 init_get_bits(&s->gb, ptr, bit_length);
7608 ff_h264_decode_seq_parameter_set(h);
7610 if(s->flags& CODEC_FLAG_LOW_DELAY)
7613 if(avctx->has_b_frames < 2)
7614 avctx->has_b_frames= !s->low_delay;
7617 init_get_bits(&s->gb, ptr, bit_length);
7619 ff_h264_decode_picture_parameter_set(h, bit_length);
7623 case NAL_END_SEQUENCE:
7624 case NAL_END_STREAM:
7625 case NAL_FILLER_DATA:
7627 case NAL_AUXILIARY_SLICE:
7630 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7633 if(context_count == h->max_contexts) {
7634 execute_decode_slices(h, context_count);
7639 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7641 /* Slice could not be decoded in parallel mode, copy down
7642 * NAL unit stuff to context 0 and restart. Note that
7643 * rbsp_buffer is not transferred, but since we no longer
7644 * run in parallel mode this should not be an issue. */
7645 h->nal_unit_type = hx->nal_unit_type;
7646 h->nal_ref_idc = hx->nal_ref_idc;
7652 execute_decode_slices(h, context_count);
7657 * returns the number of bytes consumed for building the current frame
7659 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7660 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7661 if(pos+10>buf_size) pos=buf_size; // oops ;)
7666 static int decode_frame(AVCodecContext *avctx,
7667 void *data, int *data_size,
7670 const uint8_t *buf = avpkt->data;
7671 int buf_size = avpkt->size;
7672 H264Context *h = avctx->priv_data;
7673 MpegEncContext *s = &h->s;
7674 AVFrame *pict = data;
7677 s->flags= avctx->flags;
7678 s->flags2= avctx->flags2;
7680 /* end of stream, output what is still in the buffers */
7681 if (buf_size == 0) {
7685 //FIXME factorize this with the output code below
7686 out = h->delayed_pic[0];
7688 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7689 if(h->delayed_pic[i]->poc < out->poc){
7690 out = h->delayed_pic[i];
7694 for(i=out_idx; h->delayed_pic[i]; i++)
7695 h->delayed_pic[i] = h->delayed_pic[i+1];
7698 *data_size = sizeof(AVFrame);
7699 *pict= *(AVFrame*)out;
7705 if(h->is_avc && !h->got_avcC) {
7706 int i, cnt, nalsize;
7707 unsigned char *p = avctx->extradata;
7708 if(avctx->extradata_size < 7) {
7709 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7713 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7716 /* sps and pps in the avcC always have length coded with 2 bytes,
7717 so put a fake nal_length_size = 2 while parsing them */
7718 h->nal_length_size = 2;
7719 // Decode sps from avcC
7720 cnt = *(p+5) & 0x1f; // Number of sps
7722 for (i = 0; i < cnt; i++) {
7723 nalsize = AV_RB16(p) + 2;
7724 if(decode_nal_units(h, p, nalsize) < 0) {
7725 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7730 // Decode pps from avcC
7731 cnt = *(p++); // Number of pps
7732 for (i = 0; i < cnt; i++) {
7733 nalsize = AV_RB16(p) + 2;
7734 if(decode_nal_units(h, p, nalsize) != nalsize) {
7735 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7740 // Now store right nal length size, that will be use to parse all other nals
7741 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7742 // Do not reparse avcC
7746 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7747 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7752 buf_index=decode_nal_units(h, buf, buf_size);
7756 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7757 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7758 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7762 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7763 Picture *out = s->current_picture_ptr;
7764 Picture *cur = s->current_picture_ptr;
7765 int i, pics, cross_idr, out_of_order, out_idx;
7769 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7770 /* Wait for second field. */
7774 cur->repeat_pict = 0;
7776 /* Signal interlacing information externally. */
7777 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7779 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
7781 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7783 if(h->sps.pic_struct_present_flag){
7784 switch (h->sei_pic_struct)
7786 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7787 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7788 // Signal the possibility of telecined film externally (pic_struct 5,6)
7789 // From these hints, let the applications decide if they apply deinterlacing.
7790 cur->repeat_pict = 1;
7792 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7793 // Force progressive here, as doubling interlaced frame is a bad idea.
7794 cur->interlaced_frame = 0;
7795 cur->repeat_pict = 2;
7797 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7798 cur->interlaced_frame = 0;
7799 cur->repeat_pict = 4;
7803 /* Derive interlacing flag from used decoding process. */
7804 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7807 if (cur->field_poc[0] != cur->field_poc[1]){
7808 /* Derive top_field_first from field pocs. */
7809 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7811 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7812 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7813 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7814 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7815 cur->top_field_first = 1;
7817 cur->top_field_first = 0;
7819 /* Most likely progressive */
7820 cur->top_field_first = 0;
7824 //FIXME do something with unavailable reference frames
7826 /* Sort B-frames into display order */
7828 if(h->sps.bitstream_restriction_flag
7829 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7830 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7834 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7835 && !h->sps.bitstream_restriction_flag){
7836 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7841 while(h->delayed_pic[pics]) pics++;
7843 assert(pics <= MAX_DELAYED_PIC_COUNT);
7845 h->delayed_pic[pics++] = cur;
7846 if(cur->reference == 0)
7847 cur->reference = DELAYED_PIC_REF;
7849 out = h->delayed_pic[0];
7851 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7852 if(h->delayed_pic[i]->poc < out->poc){
7853 out = h->delayed_pic[i];
7856 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7858 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7860 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7862 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7864 ((!cross_idr && out->poc > h->outputed_poc + 2)
7865 || cur->pict_type == FF_B_TYPE)))
7868 s->avctx->has_b_frames++;
7871 if(out_of_order || pics > s->avctx->has_b_frames){
7872 out->reference &= ~DELAYED_PIC_REF;
7873 for(i=out_idx; h->delayed_pic[i]; i++)
7874 h->delayed_pic[i] = h->delayed_pic[i+1];
7876 if(!out_of_order && pics > s->avctx->has_b_frames){
7877 *data_size = sizeof(AVFrame);
7879 h->outputed_poc = out->poc;
7880 *pict= *(AVFrame*)out;
7882 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7887 assert(pict->data[0] || !*data_size);
7888 ff_print_debug_info(s, pict);
7889 //printf("out %d\n", (int)pict->data[0]);
7892 /* Return the Picture timestamp as the frame number */
7893 /* we subtract 1 because it is added on utils.c */
7894 avctx->frame_number = s->picture_number - 1;
7896 return get_consumed_bytes(s, buf_index, buf_size);
7899 static inline void fill_mb_avail(H264Context *h){
7900 MpegEncContext * const s = &h->s;
7901 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7904 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7905 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7906 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7912 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7913 h->mb_avail[4]= 1; //FIXME move out
7914 h->mb_avail[5]= 0; //FIXME move out
7922 #define SIZE (COUNT*40)
7928 // int int_temp[10000];
7930 AVCodecContext avctx;
7932 dsputil_init(&dsp, &avctx);
7934 init_put_bits(&pb, temp, SIZE);
7935 printf("testing unsigned exp golomb\n");
7936 for(i=0; i<COUNT; i++){
7938 set_ue_golomb(&pb, i);
7939 STOP_TIMER("set_ue_golomb");
7941 flush_put_bits(&pb);
7943 init_get_bits(&gb, temp, 8*SIZE);
7944 for(i=0; i<COUNT; i++){
7947 s= show_bits(&gb, 24);
7950 j= get_ue_golomb(&gb);
7952 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7955 STOP_TIMER("get_ue_golomb");
7959 init_put_bits(&pb, temp, SIZE);
7960 printf("testing signed exp golomb\n");
7961 for(i=0; i<COUNT; i++){
7963 set_se_golomb(&pb, i - COUNT/2);
7964 STOP_TIMER("set_se_golomb");
7966 flush_put_bits(&pb);
7968 init_get_bits(&gb, temp, 8*SIZE);
7969 for(i=0; i<COUNT; i++){
7972 s= show_bits(&gb, 24);
7975 j= get_se_golomb(&gb);
7976 if(j != i - COUNT/2){
7977 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7980 STOP_TIMER("get_se_golomb");
7984 printf("testing 4x4 (I)DCT\n");
7987 uint8_t src[16], ref[16];
7988 uint64_t error= 0, max_error=0;
7990 for(i=0; i<COUNT; i++){
7992 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7993 for(j=0; j<16; j++){
7994 ref[j]= random()%255;
7995 src[j]= random()%255;
7998 h264_diff_dct_c(block, src, ref, 4);
8001 for(j=0; j<16; j++){
8002 // printf("%d ", block[j]);
8003 block[j]= block[j]*4;
8004 if(j&1) block[j]= (block[j]*4 + 2)/5;
8005 if(j&4) block[j]= (block[j]*4 + 2)/5;
8009 s->dsp.h264_idct_add(ref, block, 4);
8010 /* for(j=0; j<16; j++){
8011 printf("%d ", ref[j]);
8015 for(j=0; j<16; j++){
8016 int diff= FFABS(src[j] - ref[j]);
8019 max_error= FFMAX(max_error, diff);
8022 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8023 printf("testing quantizer\n");
8024 for(qp=0; qp<52; qp++){
8026 src1_block[i]= src2_block[i]= random()%255;
8029 printf("Testing NAL layer\n");
8031 uint8_t bitstream[COUNT];
8032 uint8_t nal[COUNT*2];
8034 memset(&h, 0, sizeof(H264Context));
8036 for(i=0; i<COUNT; i++){
8044 for(j=0; j<COUNT; j++){
8045 bitstream[j]= (random() % 255) + 1;
8048 for(j=0; j<zeros; j++){
8049 int pos= random() % COUNT;
8050 while(bitstream[pos] == 0){
8059 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8061 printf("encoding failed\n");
8065 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8069 if(out_length != COUNT){
8070 printf("incorrect length %d %d\n", out_length, COUNT);
8074 if(consumed != nal_length){
8075 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8079 if(memcmp(bitstream, out, COUNT)){
8080 printf("mismatch\n");
8086 printf("Testing RBSP\n");
8094 av_cold void ff_h264_free_context(H264Context *h)
8098 av_freep(&h->rbsp_buffer[0]);
8099 av_freep(&h->rbsp_buffer[1]);
8100 free_tables(h); //FIXME cleanup init stuff perhaps
8102 for(i = 0; i < MAX_SPS_COUNT; i++)
8103 av_freep(h->sps_buffers + i);
8105 for(i = 0; i < MAX_PPS_COUNT; i++)
8106 av_freep(h->pps_buffers + i);
8109 static av_cold int decode_end(AVCodecContext *avctx)
8111 H264Context *h = avctx->priv_data;
8112 MpegEncContext *s = &h->s;
8114 ff_h264_free_context(h);
8118 // memset(h, 0, sizeof(H264Context));
8124 AVCodec h264_decoder = {
8128 sizeof(H264Context),
8133 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8135 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8136 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8139 #if CONFIG_H264_VDPAU_DECODER
8140 AVCodec h264_vdpau_decoder = {
8144 sizeof(H264Context),
8149 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8151 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8155 #if CONFIG_SVQ3_DECODER