2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "x86/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 #define LEVEL_TAB_BITS 8
105 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
107 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
108 MpegEncContext * const s = &h->s;
109 const int mb_xy= h->mb_xy;
110 int topleft_xy, top_xy, topright_xy, left_xy[2];
111 int topleft_type, top_type, topright_type, left_type[2];
112 const int * left_block;
113 int topleft_partition= -1;
116 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
118 //FIXME deblocking could skip the intra and nnz parts.
119 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
122 /* Wow, what a mess, why didn't they simplify the interlacing & intra
123 * stuff, I can't imagine that these complex rules are worth it. */
125 topleft_xy = top_xy - 1;
126 topright_xy= top_xy + 1;
127 left_xy[1] = left_xy[0] = mb_xy-1;
128 left_block = left_block_options[0];
130 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
131 const int top_pair_xy = pair_xy - s->mb_stride;
132 const int topleft_pair_xy = top_pair_xy - 1;
133 const int topright_pair_xy = top_pair_xy + 1;
134 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
135 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
136 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
137 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
138 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
139 const int bottom = (s->mb_y & 1);
140 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
142 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
143 top_xy -= s->mb_stride;
145 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
146 topleft_xy -= s->mb_stride;
147 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
148 topleft_xy += s->mb_stride;
149 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
150 topleft_partition = 0;
152 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
153 topright_xy -= s->mb_stride;
155 if (left_mb_field_flag != curr_mb_field_flag) {
156 left_xy[1] = left_xy[0] = pair_xy - 1;
157 if (curr_mb_field_flag) {
158 left_xy[1] += s->mb_stride;
159 left_block = left_block_options[3];
161 left_block= left_block_options[2 - bottom];
166 h->top_mb_xy = top_xy;
167 h->left_mb_xy[0] = left_xy[0];
168 h->left_mb_xy[1] = left_xy[1];
172 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
173 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
174 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
176 if(MB_MBAFF && !IS_INTRA(mb_type)){
178 for(list=0; list<h->list_count; list++){
179 //These values where changed for ease of performing MC, we need to change them back
180 //FIXME maybe we can make MC and loop filter use the same values or prevent
181 //the MC code from changing ref_cache and rather use a temporary array.
182 if(USES_LIST(mb_type,list)){
183 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
184 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
185 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
187 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
193 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
194 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
195 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
196 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
197 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
199 if(IS_INTRA(mb_type)){
200 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
201 h->topleft_samples_available=
202 h->top_samples_available=
203 h->left_samples_available= 0xFFFF;
204 h->topright_samples_available= 0xEEEA;
206 if(!(top_type & type_mask)){
207 h->topleft_samples_available= 0xB3FF;
208 h->top_samples_available= 0x33FF;
209 h->topright_samples_available= 0x26EA;
211 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
212 if(IS_INTERLACED(mb_type)){
213 if(!(left_type[0] & type_mask)){
214 h->topleft_samples_available&= 0xDFFF;
215 h->left_samples_available&= 0x5FFF;
217 if(!(left_type[1] & type_mask)){
218 h->topleft_samples_available&= 0xFF5F;
219 h->left_samples_available&= 0xFF5F;
222 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
223 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
224 assert(left_xy[0] == left_xy[1]);
225 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
226 h->topleft_samples_available&= 0xDF5F;
227 h->left_samples_available&= 0x5F5F;
231 if(!(left_type[0] & type_mask)){
232 h->topleft_samples_available&= 0xDF5F;
233 h->left_samples_available&= 0x5F5F;
237 if(!(topleft_type & type_mask))
238 h->topleft_samples_available&= 0x7FFF;
240 if(!(topright_type & type_mask))
241 h->topright_samples_available&= 0xFBFF;
243 if(IS_INTRA4x4(mb_type)){
244 if(IS_INTRA4x4(top_type)){
245 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
246 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
247 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
248 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
251 if(!(top_type & type_mask))
256 h->intra4x4_pred_mode_cache[4+8*0]=
257 h->intra4x4_pred_mode_cache[5+8*0]=
258 h->intra4x4_pred_mode_cache[6+8*0]=
259 h->intra4x4_pred_mode_cache[7+8*0]= pred;
262 if(IS_INTRA4x4(left_type[i])){
263 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
264 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
267 if(!(left_type[i] & type_mask))
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
289 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
291 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
292 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
293 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
294 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
296 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
297 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
299 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
300 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
303 h->non_zero_count_cache[4+8*0]=
304 h->non_zero_count_cache[5+8*0]=
305 h->non_zero_count_cache[6+8*0]=
306 h->non_zero_count_cache[7+8*0]=
308 h->non_zero_count_cache[1+8*0]=
309 h->non_zero_count_cache[2+8*0]=
311 h->non_zero_count_cache[1+8*3]=
312 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
316 for (i=0; i<2; i++) {
318 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
319 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
320 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
321 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
323 h->non_zero_count_cache[3+8*1 + 2*8*i]=
324 h->non_zero_count_cache[3+8*2 + 2*8*i]=
325 h->non_zero_count_cache[0+8*1 + 8*i]=
326 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 h->top_cbp = h->cbp_table[top_xy];
334 } else if(IS_INTRA(mb_type)) {
341 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
342 } else if(IS_INTRA(mb_type)) {
348 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
351 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
356 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
358 for(list=0; list<h->list_count; list++){
359 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
360 /*if(!h->mv_cache_clean[list]){
361 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
362 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
363 h->mv_cache_clean[list]= 1;
367 h->mv_cache_clean[list]= 0;
369 if(USES_LIST(top_type, list)){
370 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
371 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
376 h->ref_cache[list][scan8[0] + 0 - 1*8]=
377 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
378 h->ref_cache[list][scan8[0] + 2 - 1*8]=
379 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
385 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
389 int cache_idx = scan8[0] - 1 + i*2*8;
390 if(USES_LIST(left_type[i], list)){
391 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
392 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
393 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
394 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
395 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
396 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
398 *(uint32_t*)h->mv_cache [list][cache_idx ]=
399 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
400 h->ref_cache[list][cache_idx ]=
401 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
405 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
408 if(USES_LIST(topleft_type, list)){
409 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
410 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
418 if(USES_LIST(topright_type, list)){
419 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
420 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
421 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
424 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
428 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
431 h->ref_cache[list][scan8[5 ]+1] =
432 h->ref_cache[list][scan8[7 ]+1] =
433 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
434 h->ref_cache[list][scan8[4 ]] =
435 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
436 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
437 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
438 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
439 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
440 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
443 /* XXX beurk, Load mvd */
444 if(USES_LIST(top_type, list)){
445 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
456 if(USES_LIST(left_type[0], list)){
457 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
458 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
461 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
464 if(USES_LIST(left_type[1], list)){
465 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
466 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
469 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
472 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
473 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
474 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
475 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
476 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
478 if(h->slice_type_nos == FF_B_TYPE){
479 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
481 if(IS_DIRECT(top_type)){
482 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
483 }else if(IS_8X8(top_type)){
484 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
485 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
486 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
491 if(IS_DIRECT(left_type[0]))
492 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
493 else if(IS_8X8(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
496 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
498 if(IS_DIRECT(left_type[1]))
499 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
500 else if(IS_8X8(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
503 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
509 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
510 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
515 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
516 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
518 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
520 #define MAP_F2F(idx, mb_type)\
521 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
522 h->ref_cache[list][idx] <<= 1;\
523 h->mv_cache[list][idx][1] /= 2;\
524 h->mvd_cache[list][idx][1] /= 2;\
529 #define MAP_F2F(idx, mb_type)\
530 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] >>= 1;\
532 h->mv_cache[list][idx][1] <<= 1;\
533 h->mvd_cache[list][idx][1] <<= 1;\
543 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
546 static inline void write_back_intra_pred_mode(H264Context *h){
547 const int mb_xy= h->mb_xy;
549 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
550 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
551 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
552 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
553 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
554 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
555 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
559 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
561 static inline int check_intra4x4_pred_mode(H264Context *h){
562 MpegEncContext * const s = &h->s;
563 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
564 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
567 if(!(h->top_samples_available&0x8000)){
569 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
571 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
574 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
579 if((h->left_samples_available&0x8888)!=0x8888){
580 static const int mask[4]={0x8000,0x2000,0x80,0x20};
582 if(!(h->left_samples_available&mask[i])){
583 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
585 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
588 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
595 } //FIXME cleanup like next
598 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
600 static inline int check_intra_pred_mode(H264Context *h, int mode){
601 MpegEncContext * const s = &h->s;
602 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
603 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
606 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610 if(!(h->top_samples_available&0x8000)){
613 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
618 if((h->left_samples_available&0x8080) != 0x8080){
620 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
621 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
668 * gets the predicted number of non-zero coefficients.
669 * @param n block index
671 static inline int pred_non_zero_count(H264Context *h, int n){
672 const int index8= scan8[n];
673 const int left= h->non_zero_count_cache[index8 - 1];
674 const int top = h->non_zero_count_cache[index8 - 8];
677 if(i<64) i= (i+1)>>1;
679 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
684 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
685 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
686 MpegEncContext *s = &h->s;
688 /* there is no consistent mapping of mvs to neighboring locations that will
689 * make mbaff happy, so we can't move all this logic to fill_caches */
691 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
693 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
694 *C = h->mv_cache[list][scan8[0]-2];
697 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
698 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
699 if(IS_INTERLACED(mb_types[topright_xy])){
700 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
701 const int x4 = X4, y4 = Y4;\
702 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
703 if(!USES_LIST(mb_type,list))\
704 return LIST_NOT_USED;\
705 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
706 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
707 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
708 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
710 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
713 if(topright_ref == PART_NOT_AVAILABLE
714 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
715 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
717 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
718 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
721 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
723 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
724 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
730 if(topright_ref != PART_NOT_AVAILABLE){
731 *C= h->mv_cache[list][ i - 8 + part_width ];
734 tprintf(s->avctx, "topright MV not available\n");
736 *C= h->mv_cache[list][ i - 8 - 1 ];
737 return h->ref_cache[list][ i - 8 - 1 ];
742 * gets the predicted MV.
743 * @param n the block index
744 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
745 * @param mx the x component of the predicted motion vector
746 * @param my the y component of the predicted motion vector
748 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
749 const int index8= scan8[n];
750 const int top_ref= h->ref_cache[list][ index8 - 8 ];
751 const int left_ref= h->ref_cache[list][ index8 - 1 ];
752 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
753 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
755 int diagonal_ref, match_count;
757 assert(part_width==1 || part_width==2 || part_width==4);
767 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
768 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
769 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
770 if(match_count > 1){ //most common
771 *mx= mid_pred(A[0], B[0], C[0]);
772 *my= mid_pred(A[1], B[1], C[1]);
773 }else if(match_count==1){
777 }else if(top_ref==ref){
785 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
789 *mx= mid_pred(A[0], B[0], C[0]);
790 *my= mid_pred(A[1], B[1], C[1]);
794 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
798 * gets the directionally predicted 16x8 MV.
799 * @param n the block index
800 * @param mx the x component of the predicted motion vector
801 * @param my the y component of the predicted motion vector
803 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
805 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
806 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
808 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
816 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
817 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
819 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
829 pred_motion(h, n, 4, list, ref, mx, my);
833 * gets the directionally predicted 8x16 MV.
834 * @param n the block index
835 * @param mx the x component of the predicted motion vector
836 * @param my the y component of the predicted motion vector
838 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
840 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
841 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
843 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
854 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
856 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
858 if(diagonal_ref == ref){
866 pred_motion(h, n, 2, list, ref, mx, my);
869 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
870 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
871 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
873 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
875 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
876 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
877 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
883 pred_motion(h, 0, 4, 0, 0, mx, my);
888 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
889 int poc0 = h->ref_list[0][i].poc;
890 int td = av_clip(poc1 - poc0, -128, 127);
891 if(td == 0 || h->ref_list[0][i].long_ref){
894 int tb = av_clip(poc - poc0, -128, 127);
895 int tx = (16384 + (FFABS(td) >> 1)) / td;
896 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
900 static inline void direct_dist_scale_factor(H264Context * const h){
901 MpegEncContext * const s = &h->s;
902 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
903 const int poc1 = h->ref_list[1][0].poc;
905 for(field=0; field<2; field++){
906 const int poc = h->s.current_picture_ptr->field_poc[field];
907 const int poc1 = h->ref_list[1][0].field_poc[field];
908 for(i=0; i < 2*h->ref_count[0]; i++)
909 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
917 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
918 MpegEncContext * const s = &h->s;
919 Picture * const ref1 = &h->ref_list[1][0];
920 int j, old_ref, rfield;
921 int start= mbafi ? 16 : 0;
922 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
923 int interl= mbafi || s->picture_structure != PICT_FRAME;
925 /* bogus; fills in for missing frames */
926 memset(map[list], 0, sizeof(map[list]));
928 for(rfield=0; rfield<2; rfield++){
929 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
930 int poc = ref1->ref_poc[colfield][list][old_ref];
934 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
935 poc= (poc&~3) + rfield + 1;
937 for(j=start; j<end; j++){
938 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
939 int cur_ref= mbafi ? (j-16)^field : j;
940 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
942 map[list][old_ref] = cur_ref;
950 static inline void direct_ref_list_init(H264Context * const h){
951 MpegEncContext * const s = &h->s;
952 Picture * const ref1 = &h->ref_list[1][0];
953 Picture * const cur = s->current_picture_ptr;
955 int sidx= (s->picture_structure&1)^1;
956 int ref1sidx= (ref1->reference&1)^1;
958 for(list=0; list<2; list++){
959 cur->ref_count[sidx][list] = h->ref_count[list];
960 for(j=0; j<h->ref_count[list]; j++)
961 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
964 if(s->picture_structure == PICT_FRAME){
965 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
966 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
969 cur->mbaff= FRAME_MBAFF;
971 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
974 for(list=0; list<2; list++){
975 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
976 for(field=0; field<2; field++)
977 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
981 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
982 MpegEncContext * const s = &h->s;
983 int b8_stride = h->b8_stride;
984 int b4_stride = h->b_stride;
985 int mb_xy = h->mb_xy;
987 const int16_t (*l1mv0)[2], (*l1mv1)[2];
988 const int8_t *l1ref0, *l1ref1;
989 const int is_b8x8 = IS_8X8(*mb_type);
990 unsigned int sub_mb_type;
993 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
995 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
996 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
997 int cur_poc = s->current_picture_ptr->poc;
998 int *col_poc = h->ref_list[1]->field_poc;
999 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1000 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1002 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1003 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1004 mb_xy += s->mb_stride*fieldoff;
1007 }else{ // AFL/AFR/FR/FL -> AFR/FR
1008 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1009 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1010 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1011 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1014 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1015 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1016 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1018 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1019 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1024 }else{ // AFR/FR -> AFR/FR
1027 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1028 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1029 /* FIXME save sub mb types from previous frames (or derive from MVs)
1030 * so we know exactly what block size to use */
1031 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1032 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1034 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1035 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1043 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1044 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1045 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1046 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1049 l1ref0 += h->b8_stride;
1050 l1ref1 += h->b8_stride;
1051 l1mv0 += 2*b4_stride;
1052 l1mv1 += 2*b4_stride;
1056 if(h->direct_spatial_mv_pred){
1061 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1063 /* ref = min(neighbors) */
1064 for(list=0; list<2; list++){
1065 int refa = h->ref_cache[list][scan8[0] - 1];
1066 int refb = h->ref_cache[list][scan8[0] - 8];
1067 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1068 if(refc == PART_NOT_AVAILABLE)
1069 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1070 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1075 if(ref[0] < 0 && ref[1] < 0){
1076 ref[0] = ref[1] = 0;
1077 mv[0][0] = mv[0][1] =
1078 mv[1][0] = mv[1][1] = 0;
1080 for(list=0; list<2; list++){
1082 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1084 mv[list][0] = mv[list][1] = 0;
1090 *mb_type &= ~MB_TYPE_L1;
1091 sub_mb_type &= ~MB_TYPE_L1;
1092 }else if(ref[0] < 0){
1094 *mb_type &= ~MB_TYPE_L0;
1095 sub_mb_type &= ~MB_TYPE_L0;
1098 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1099 for(i8=0; i8<4; i8++){
1102 int xy8 = x8+y8*b8_stride;
1103 int xy4 = 3*x8+y8*b4_stride;
1106 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1108 h->sub_mb_type[i8] = sub_mb_type;
1110 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1111 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1112 if(!IS_INTRA(mb_type_col[y8])
1113 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1114 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1116 a= pack16to32(mv[0][0],mv[0][1]);
1118 b= pack16to32(mv[1][0],mv[1][1]);
1120 a= pack16to32(mv[0][0],mv[0][1]);
1121 b= pack16to32(mv[1][0],mv[1][1]);
1123 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1124 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1126 }else if(IS_16X16(*mb_type)){
1129 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1130 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1131 if(!IS_INTRA(mb_type_col[0])
1132 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1133 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1134 && (h->x264_build>33 || !h->x264_build)))){
1136 a= pack16to32(mv[0][0],mv[0][1]);
1138 b= pack16to32(mv[1][0],mv[1][1]);
1140 a= pack16to32(mv[0][0],mv[0][1]);
1141 b= pack16to32(mv[1][0],mv[1][1]);
1143 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1144 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1146 for(i8=0; i8<4; i8++){
1147 const int x8 = i8&1;
1148 const int y8 = i8>>1;
1150 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1152 h->sub_mb_type[i8] = sub_mb_type;
1154 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1155 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1156 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1157 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1160 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1161 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1162 && (h->x264_build>33 || !h->x264_build)))){
1163 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1164 if(IS_SUB_8X8(sub_mb_type)){
1165 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1166 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1168 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1170 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1173 for(i4=0; i4<4; i4++){
1174 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1179 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1185 }else{ /* direct temporal mv pred */
1186 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1187 const int *dist_scale_factor = h->dist_scale_factor;
1190 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1191 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1192 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1193 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1195 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1198 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1199 /* FIXME assumes direct_8x8_inference == 1 */
1200 int y_shift = 2*!IS_INTERLACED(*mb_type);
1202 for(i8=0; i8<4; i8++){
1203 const int x8 = i8&1;
1204 const int y8 = i8>>1;
1206 const int16_t (*l1mv)[2]= l1mv0;
1208 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1210 h->sub_mb_type[i8] = sub_mb_type;
1212 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1213 if(IS_INTRA(mb_type_col[y8])){
1214 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1216 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1220 ref0 = l1ref0[x8 + y8*b8_stride];
1222 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1224 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1227 scale = dist_scale_factor[ref0];
1228 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1231 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1232 int my_col = (mv_col[1]<<y_shift)/2;
1233 int mx = (scale * mv_col[0] + 128) >> 8;
1234 int my = (scale * my_col + 128) >> 8;
1235 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1236 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1242 /* one-to-one mv scaling */
1244 if(IS_16X16(*mb_type)){
1247 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1248 if(IS_INTRA(mb_type_col[0])){
1251 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1252 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1253 const int scale = dist_scale_factor[ref0];
1254 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1256 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1257 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1259 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1260 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1262 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1263 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1264 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1266 for(i8=0; i8<4; i8++){
1267 const int x8 = i8&1;
1268 const int y8 = i8>>1;
1270 const int16_t (*l1mv)[2]= l1mv0;
1272 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1274 h->sub_mb_type[i8] = sub_mb_type;
1275 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1276 if(IS_INTRA(mb_type_col[0])){
1277 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1278 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1279 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1283 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1285 ref0 = map_col_to_list0[0][ref0];
1287 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1290 scale = dist_scale_factor[ref0];
1292 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1293 if(IS_SUB_8X8(sub_mb_type)){
1294 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1295 int mx = (scale * mv_col[0] + 128) >> 8;
1296 int my = (scale * mv_col[1] + 128) >> 8;
1297 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1298 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1300 for(i4=0; i4<4; i4++){
1301 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1302 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1303 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1304 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1305 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1306 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1313 static inline void write_back_motion(H264Context *h, int mb_type){
1314 MpegEncContext * const s = &h->s;
1315 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1316 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1319 if(!USES_LIST(mb_type, 0))
1320 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1322 for(list=0; list<h->list_count; list++){
1324 if(!USES_LIST(mb_type, list))
1328 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1329 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1331 if( h->pps.cabac ) {
1332 if(IS_SKIP(mb_type))
1333 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1336 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1337 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1342 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1343 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1344 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1345 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1346 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1350 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1351 if(IS_8X8(mb_type)){
1352 uint8_t *direct_table = &h->direct_table[b8_xy];
1353 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1354 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1355 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1361 * Decodes a network abstraction layer unit.
1362 * @param consumed is the number of bytes used as input
1363 * @param length is the length of the array
1364 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1365 * @returns decoded bytes, might be src+1 if no escapes
1367 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1372 // src[0]&0x80; //forbidden bit
1373 h->nal_ref_idc= src[0]>>5;
1374 h->nal_unit_type= src[0]&0x1F;
1378 for(i=0; i<length; i++)
1379 printf("%2X ", src[i]);
1382 #ifdef HAVE_FAST_UNALIGNED
1383 # ifdef HAVE_FAST_64BIT
1385 for(i=0; i+1<length; i+=9){
1386 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1389 for(i=0; i+1<length; i+=5){
1390 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1393 if(i>0 && !src[i]) i--;
1397 for(i=0; i+1<length; i+=2){
1398 if(src[i]) continue;
1399 if(i>0 && src[i-1]==0) i--;
1401 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1403 /* startcode, so we must be past the end */
1411 if(i>=length-1){ //no escaped 0
1412 *dst_length= length;
1413 *consumed= length+1; //+1 for the header
1417 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1418 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1419 dst= h->rbsp_buffer[bufidx];
1425 //printf("decoding esc\n");
1426 memcpy(dst, src, i);
1429 //remove escapes (very rare 1:2^22)
1431 dst[di++]= src[si++];
1432 dst[di++]= src[si++];
1433 }else if(src[si]==0 && src[si+1]==0){
1434 if(src[si+2]==3){ //escape
1439 }else //next start code
1443 dst[di++]= src[si++];
1446 dst[di++]= src[si++];
1449 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1452 *consumed= si + 1;//+1 for the header
1453 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1458 * identifies the exact end of the bitstream
1459 * @return the length of the trailing, or 0 if damaged
1461 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1465 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1475 * IDCT transforms the 16 dc values and dequantizes them.
1476 * @param qp quantization parameter
1478 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1481 int temp[16]; //FIXME check if this is a good idea
1482 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1483 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1485 //memset(block, 64, 2*256);
1488 const int offset= y_offset[i];
1489 const int z0= block[offset+stride*0] + block[offset+stride*4];
1490 const int z1= block[offset+stride*0] - block[offset+stride*4];
1491 const int z2= block[offset+stride*1] - block[offset+stride*5];
1492 const int z3= block[offset+stride*1] + block[offset+stride*5];
1501 const int offset= x_offset[i];
1502 const int z0= temp[4*0+i] + temp[4*2+i];
1503 const int z1= temp[4*0+i] - temp[4*2+i];
1504 const int z2= temp[4*1+i] - temp[4*3+i];
1505 const int z3= temp[4*1+i] + temp[4*3+i];
1507 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1508 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1509 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1510 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1516 * DCT transforms the 16 dc values.
1517 * @param qp quantization parameter ??? FIXME
1519 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1520 // const int qmul= dequant_coeff[qp][0];
1522 int temp[16]; //FIXME check if this is a good idea
1523 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1524 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1527 const int offset= y_offset[i];
1528 const int z0= block[offset+stride*0] + block[offset+stride*4];
1529 const int z1= block[offset+stride*0] - block[offset+stride*4];
1530 const int z2= block[offset+stride*1] - block[offset+stride*5];
1531 const int z3= block[offset+stride*1] + block[offset+stride*5];
1540 const int offset= x_offset[i];
1541 const int z0= temp[4*0+i] + temp[4*2+i];
1542 const int z1= temp[4*0+i] - temp[4*2+i];
1543 const int z2= temp[4*1+i] - temp[4*3+i];
1544 const int z3= temp[4*1+i] + temp[4*3+i];
1546 block[stride*0 +offset]= (z0 + z3)>>1;
1547 block[stride*2 +offset]= (z1 + z2)>>1;
1548 block[stride*8 +offset]= (z1 - z2)>>1;
1549 block[stride*10+offset]= (z0 - z3)>>1;
1557 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1558 const int stride= 16*2;
1559 const int xStride= 16;
1562 a= block[stride*0 + xStride*0];
1563 b= block[stride*0 + xStride*1];
1564 c= block[stride*1 + xStride*0];
1565 d= block[stride*1 + xStride*1];
1572 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1573 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1574 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1575 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1579 static void chroma_dc_dct_c(DCTELEM *block){
1580 const int stride= 16*2;
1581 const int xStride= 16;
1584 a= block[stride*0 + xStride*0];
1585 b= block[stride*0 + xStride*1];
1586 c= block[stride*1 + xStride*0];
1587 d= block[stride*1 + xStride*1];
1594 block[stride*0 + xStride*0]= (a+c);
1595 block[stride*0 + xStride*1]= (e+b);
1596 block[stride*1 + xStride*0]= (a-c);
1597 block[stride*1 + xStride*1]= (e-b);
1602 * gets the chroma qp.
1604 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1605 return h->pps.chroma_qp_table[t][qscale];
1608 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1609 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1610 int src_x_offset, int src_y_offset,
1611 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1612 MpegEncContext * const s = &h->s;
1613 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1614 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1615 const int luma_xy= (mx&3) + ((my&3)<<2);
1616 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1617 uint8_t * src_cb, * src_cr;
1618 int extra_width= h->emu_edge_width;
1619 int extra_height= h->emu_edge_height;
1621 const int full_mx= mx>>2;
1622 const int full_my= my>>2;
1623 const int pic_width = 16*s->mb_width;
1624 const int pic_height = 16*s->mb_height >> MB_FIELD;
1626 if(mx&7) extra_width -= 3;
1627 if(my&7) extra_height -= 3;
1629 if( full_mx < 0-extra_width
1630 || full_my < 0-extra_height
1631 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1632 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1633 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1634 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1638 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1640 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1643 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1646 // chroma offset when predicting from a field of opposite parity
1647 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1648 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1650 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1651 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cb= s->edge_emu_buffer;
1657 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1661 src_cr= s->edge_emu_buffer;
1663 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1666 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1667 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1668 int x_offset, int y_offset,
1669 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1670 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1671 int list0, int list1){
1672 MpegEncContext * const s = &h->s;
1673 qpel_mc_func *qpix_op= qpix_put;
1674 h264_chroma_mc_func chroma_op= chroma_put;
1676 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1677 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1678 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1679 x_offset += 8*s->mb_x;
1680 y_offset += 8*(s->mb_y >> MB_FIELD);
1683 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1684 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1685 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1686 qpix_op, chroma_op);
1689 chroma_op= chroma_avg;
1693 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1694 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1695 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1696 qpix_op, chroma_op);
1700 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1701 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1702 int x_offset, int y_offset,
1703 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1704 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1705 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1706 int list0, int list1){
1707 MpegEncContext * const s = &h->s;
1709 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1710 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1711 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1712 x_offset += 8*s->mb_x;
1713 y_offset += 8*(s->mb_y >> MB_FIELD);
1716 /* don't optimize for luma-only case, since B-frames usually
1717 * use implicit weights => chroma too. */
1718 uint8_t *tmp_cb = s->obmc_scratchpad;
1719 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1720 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1721 int refn0 = h->ref_cache[0][ scan8[n] ];
1722 int refn1 = h->ref_cache[1][ scan8[n] ];
1724 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1725 dest_y, dest_cb, dest_cr,
1726 x_offset, y_offset, qpix_put, chroma_put);
1727 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1728 tmp_y, tmp_cb, tmp_cr,
1729 x_offset, y_offset, qpix_put, chroma_put);
1731 if(h->use_weight == 2){
1732 int weight0 = h->implicit_weight[refn0][refn1];
1733 int weight1 = 64 - weight0;
1734 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1736 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1739 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1740 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1741 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1742 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1743 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1744 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1745 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1746 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1749 int list = list1 ? 1 : 0;
1750 int refn = h->ref_cache[list][ scan8[n] ];
1751 Picture *ref= &h->ref_list[list][refn];
1752 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1753 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1754 qpix_put, chroma_put);
1756 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1757 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1758 if(h->use_weight_chroma){
1759 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1760 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1761 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1762 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1767 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1768 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1769 int x_offset, int y_offset,
1770 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1771 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1772 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1773 int list0, int list1){
1774 if((h->use_weight==2 && list0 && list1
1775 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1776 || h->use_weight==1)
1777 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1778 x_offset, y_offset, qpix_put, chroma_put,
1779 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1781 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1782 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1785 static inline void prefetch_motion(H264Context *h, int list){
1786 /* fetch pixels for estimated mv 4 macroblocks ahead
1787 * optimized for 64byte cache lines */
1788 MpegEncContext * const s = &h->s;
1789 const int refn = h->ref_cache[list][scan8[0]];
1791 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1792 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1793 uint8_t **src= h->ref_list[list][refn].data;
1794 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1795 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1796 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1797 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1801 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1802 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1803 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1804 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1805 MpegEncContext * const s = &h->s;
1806 const int mb_xy= h->mb_xy;
1807 const int mb_type= s->current_picture.mb_type[mb_xy];
1809 assert(IS_INTER(mb_type));
1811 prefetch_motion(h, 0);
1813 if(IS_16X16(mb_type)){
1814 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1815 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1816 &weight_op[0], &weight_avg[0],
1817 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1818 }else if(IS_16X8(mb_type)){
1819 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1820 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1821 &weight_op[1], &weight_avg[1],
1822 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1823 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1824 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1825 &weight_op[1], &weight_avg[1],
1826 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1827 }else if(IS_8X16(mb_type)){
1828 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1829 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1830 &weight_op[2], &weight_avg[2],
1831 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1832 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1833 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1834 &weight_op[2], &weight_avg[2],
1835 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1839 assert(IS_8X8(mb_type));
1842 const int sub_mb_type= h->sub_mb_type[i];
1844 int x_offset= (i&1)<<2;
1845 int y_offset= (i&2)<<1;
1847 if(IS_SUB_8X8(sub_mb_type)){
1848 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1849 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1850 &weight_op[3], &weight_avg[3],
1851 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1852 }else if(IS_SUB_8X4(sub_mb_type)){
1853 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1854 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1855 &weight_op[4], &weight_avg[4],
1856 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1857 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1858 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1859 &weight_op[4], &weight_avg[4],
1860 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1861 }else if(IS_SUB_4X8(sub_mb_type)){
1862 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1863 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1864 &weight_op[5], &weight_avg[5],
1865 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1866 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1867 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1868 &weight_op[5], &weight_avg[5],
1869 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1872 assert(IS_SUB_4X4(sub_mb_type));
1874 int sub_x_offset= x_offset + 2*(j&1);
1875 int sub_y_offset= y_offset + (j&2);
1876 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1877 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1878 &weight_op[6], &weight_avg[6],
1879 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1885 prefetch_motion(h, 1);
1888 static av_cold void init_cavlc_level_tab(void){
1889 int suffix_length, mask;
1892 for(suffix_length=0; suffix_length<7; suffix_length++){
1893 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1894 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1895 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1897 mask= -(level_code&1);
1898 level_code= (((2+level_code)>>1) ^ mask) - mask;
1899 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1900 cavlc_level_tab[suffix_length][i][0]= level_code;
1901 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1902 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1903 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1904 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1906 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1907 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1913 static av_cold void decode_init_vlc(void){
1914 static int done = 0;
1921 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1922 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1923 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1924 &chroma_dc_coeff_token_len [0], 1, 1,
1925 &chroma_dc_coeff_token_bits[0], 1, 1,
1926 INIT_VLC_USE_NEW_STATIC);
1930 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1931 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1932 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1933 &coeff_token_len [i][0], 1, 1,
1934 &coeff_token_bits[i][0], 1, 1,
1935 INIT_VLC_USE_NEW_STATIC);
1936 offset += coeff_token_vlc_tables_size[i];
1939 * This is a one time safety check to make sure that
1940 * the packed static coeff_token_vlc table sizes
1941 * were initialized correctly.
1943 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1946 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1947 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1948 init_vlc(&chroma_dc_total_zeros_vlc[i],
1949 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1950 &chroma_dc_total_zeros_len [i][0], 1, 1,
1951 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1952 INIT_VLC_USE_NEW_STATIC);
1954 for(i=0; i<15; i++){
1955 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1956 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1957 init_vlc(&total_zeros_vlc[i],
1958 TOTAL_ZEROS_VLC_BITS, 16,
1959 &total_zeros_len [i][0], 1, 1,
1960 &total_zeros_bits[i][0], 1, 1,
1961 INIT_VLC_USE_NEW_STATIC);
1965 run_vlc[i].table = run_vlc_tables[i];
1966 run_vlc[i].table_allocated = run_vlc_tables_size;
1967 init_vlc(&run_vlc[i],
1969 &run_len [i][0], 1, 1,
1970 &run_bits[i][0], 1, 1,
1971 INIT_VLC_USE_NEW_STATIC);
1973 run7_vlc.table = run7_vlc_table,
1974 run7_vlc.table_allocated = run7_vlc_table_size;
1975 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1976 &run_len [6][0], 1, 1,
1977 &run_bits[6][0], 1, 1,
1978 INIT_VLC_USE_NEW_STATIC);
1980 init_cavlc_level_tab();
1984 static void free_tables(H264Context *h){
1987 av_freep(&h->intra4x4_pred_mode);
1988 av_freep(&h->chroma_pred_mode_table);
1989 av_freep(&h->cbp_table);
1990 av_freep(&h->mvd_table[0]);
1991 av_freep(&h->mvd_table[1]);
1992 av_freep(&h->direct_table);
1993 av_freep(&h->non_zero_count);
1994 av_freep(&h->slice_table_base);
1995 h->slice_table= NULL;
1997 av_freep(&h->mb2b_xy);
1998 av_freep(&h->mb2b8_xy);
2000 for(i = 0; i < h->s.avctx->thread_count; i++) {
2001 hx = h->thread_context[i];
2003 av_freep(&hx->top_borders[1]);
2004 av_freep(&hx->top_borders[0]);
2005 av_freep(&hx->s.obmc_scratchpad);
2009 static void init_dequant8_coeff_table(H264Context *h){
2011 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2012 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2013 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2015 for(i=0; i<2; i++ ){
2016 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2017 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2021 for(q=0; q<52; q++){
2022 int shift = div6[q];
2025 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2026 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2027 h->pps.scaling_matrix8[i][x]) << shift;
2032 static void init_dequant4_coeff_table(H264Context *h){
2034 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2035 for(i=0; i<6; i++ ){
2036 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2038 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2039 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2046 for(q=0; q<52; q++){
2047 int shift = div6[q] + 2;
2050 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2051 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2052 h->pps.scaling_matrix4[i][x]) << shift;
2057 static void init_dequant_tables(H264Context *h){
2059 init_dequant4_coeff_table(h);
2060 if(h->pps.transform_8x8_mode)
2061 init_dequant8_coeff_table(h);
2062 if(h->sps.transform_bypass){
2065 h->dequant4_coeff[i][0][x] = 1<<6;
2066 if(h->pps.transform_8x8_mode)
2069 h->dequant8_coeff[i][0][x] = 1<<6;
2076 * needs width/height
2078 static int alloc_tables(H264Context *h){
2079 MpegEncContext * const s = &h->s;
2080 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2083 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2085 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2086 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2087 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2089 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2090 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2091 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2092 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2094 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2095 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2097 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2098 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2099 for(y=0; y<s->mb_height; y++){
2100 for(x=0; x<s->mb_width; x++){
2101 const int mb_xy= x + y*s->mb_stride;
2102 const int b_xy = 4*x + 4*y*h->b_stride;
2103 const int b8_xy= 2*x + 2*y*h->b8_stride;
2105 h->mb2b_xy [mb_xy]= b_xy;
2106 h->mb2b8_xy[mb_xy]= b8_xy;
2110 s->obmc_scratchpad = NULL;
2112 if(!h->dequant4_coeff[0])
2113 init_dequant_tables(h);
2122 * Mimic alloc_tables(), but for every context thread.
2124 static void clone_tables(H264Context *dst, H264Context *src){
2125 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2126 dst->non_zero_count = src->non_zero_count;
2127 dst->slice_table = src->slice_table;
2128 dst->cbp_table = src->cbp_table;
2129 dst->mb2b_xy = src->mb2b_xy;
2130 dst->mb2b8_xy = src->mb2b8_xy;
2131 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2132 dst->mvd_table[0] = src->mvd_table[0];
2133 dst->mvd_table[1] = src->mvd_table[1];
2134 dst->direct_table = src->direct_table;
2136 dst->s.obmc_scratchpad = NULL;
2137 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2142 * Allocate buffers which are not shared amongst multiple threads.
2144 static int context_init(H264Context *h){
2145 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2146 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2150 return -1; // free_tables will clean up for us
2153 static av_cold void common_init(H264Context *h){
2154 MpegEncContext * const s = &h->s;
2156 s->width = s->avctx->width;
2157 s->height = s->avctx->height;
2158 s->codec_id= s->avctx->codec->id;
2160 ff_h264_pred_init(&h->hpc, s->codec_id);
2162 h->dequant_coeff_pps= -1;
2163 s->unrestricted_mv=1;
2164 s->decode=1; //FIXME
2166 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2168 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2169 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2172 static av_cold int decode_init(AVCodecContext *avctx){
2173 H264Context *h= avctx->priv_data;
2174 MpegEncContext * const s = &h->s;
2176 MPV_decode_defaults(s);
2181 s->out_format = FMT_H264;
2182 s->workaround_bugs= avctx->workaround_bugs;
2185 // s->decode_mb= ff_h263_decode_mb;
2186 s->quarter_sample = 1;
2189 if(avctx->codec_id == CODEC_ID_SVQ3)
2190 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2192 avctx->pix_fmt= PIX_FMT_YUV420P;
2196 if(avctx->extradata_size > 0 && avctx->extradata &&
2197 *(char *)avctx->extradata == 1){
2204 h->thread_context[0] = h;
2205 h->outputed_poc = INT_MIN;
2206 h->prev_poc_msb= 1<<16;
2210 static int frame_start(H264Context *h){
2211 MpegEncContext * const s = &h->s;
2214 if(MPV_frame_start(s, s->avctx) < 0)
2216 ff_er_frame_start(s);
2218 * MPV_frame_start uses pict_type to derive key_frame.
2219 * This is incorrect for H.264; IDR markings must be used.
2220 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2221 * See decode_nal_units().
2223 s->current_picture_ptr->key_frame= 0;
2225 assert(s->linesize && s->uvlinesize);
2227 for(i=0; i<16; i++){
2228 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2229 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2232 h->block_offset[16+i]=
2233 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2234 h->block_offset[24+16+i]=
2235 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2238 /* can't be in alloc_tables because linesize isn't known there.
2239 * FIXME: redo bipred weight to not require extra buffer? */
2240 for(i = 0; i < s->avctx->thread_count; i++)
2241 if(!h->thread_context[i]->s.obmc_scratchpad)
2242 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2244 /* some macroblocks will be accessed before they're available */
2245 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2246 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2248 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2250 // We mark the current picture as non-reference after allocating it, so
2251 // that if we break out due to an error it can be released automatically
2252 // in the next MPV_frame_start().
2253 // SVQ3 as well as most other codecs have only last/next/current and thus
2254 // get released even with set reference, besides SVQ3 and others do not
2255 // mark frames as reference later "naturally".
2256 if(s->codec_id != CODEC_ID_SVQ3)
2257 s->current_picture_ptr->reference= 0;
2259 s->current_picture_ptr->field_poc[0]=
2260 s->current_picture_ptr->field_poc[1]= INT_MAX;
2261 assert(s->current_picture_ptr->long_ref==0);
2266 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2267 MpegEncContext * const s = &h->s;
2276 src_cb -= uvlinesize;
2277 src_cr -= uvlinesize;
2279 if(!simple && FRAME_MBAFF){
2281 offset = MB_MBAFF ? 1 : 17;
2282 uvoffset= MB_MBAFF ? 1 : 9;
2284 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2286 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2287 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2288 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2293 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2294 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2295 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2296 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2302 top_idx = MB_MBAFF ? 0 : 1;
2304 step= MB_MBAFF ? 2 : 1;
2307 // There are two lines saved, the line above the the top macroblock of a pair,
2308 // and the line above the bottom macroblock
2309 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2310 for(i=1; i<17 - skiplast; i++){
2311 h->left_border[offset+i*step]= src_y[15+i* linesize];
2314 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2315 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2317 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2318 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2319 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2320 for(i=1; i<9 - skiplast; i++){
2321 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2322 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2324 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2325 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2329 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2330 MpegEncContext * const s = &h->s;
2341 if(!simple && FRAME_MBAFF){
2343 offset = MB_MBAFF ? 1 : 17;
2344 uvoffset= MB_MBAFF ? 1 : 9;
2348 top_idx = MB_MBAFF ? 0 : 1;
2350 step= MB_MBAFF ? 2 : 1;
2353 if(h->deblocking_filter == 2) {
2355 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2356 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2358 deblock_left = (s->mb_x > 0);
2359 deblock_top = (s->mb_y > !!MB_FIELD);
2362 src_y -= linesize + 1;
2363 src_cb -= uvlinesize + 1;
2364 src_cr -= uvlinesize + 1;
2366 #define XCHG(a,b,t,xchg)\
2373 for(i = !deblock_top; i<16; i++){
2374 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2376 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2380 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2381 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2382 if(s->mb_x+1 < s->mb_width){
2383 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2387 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2389 for(i = !deblock_top; i<8; i++){
2390 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2391 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2393 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2394 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2397 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2403 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2404 MpegEncContext * const s = &h->s;
2405 const int mb_x= s->mb_x;
2406 const int mb_y= s->mb_y;
2407 const int mb_xy= h->mb_xy;
2408 const int mb_type= s->current_picture.mb_type[mb_xy];
2409 uint8_t *dest_y, *dest_cb, *dest_cr;
2410 int linesize, uvlinesize /*dct_offset*/;
2412 int *block_offset = &h->block_offset[0];
2413 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2414 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2415 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2416 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2418 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2419 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2420 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2422 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2423 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2425 if (!simple && MB_FIELD) {
2426 linesize = h->mb_linesize = s->linesize * 2;
2427 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2428 block_offset = &h->block_offset[24];
2429 if(mb_y&1){ //FIXME move out of this function?
2430 dest_y -= s->linesize*15;
2431 dest_cb-= s->uvlinesize*7;
2432 dest_cr-= s->uvlinesize*7;
2436 for(list=0; list<h->list_count; list++){
2437 if(!USES_LIST(mb_type, list))
2439 if(IS_16X16(mb_type)){
2440 int8_t *ref = &h->ref_cache[list][scan8[0]];
2441 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2443 for(i=0; i<16; i+=4){
2444 int ref = h->ref_cache[list][scan8[i]];
2446 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2452 linesize = h->mb_linesize = s->linesize;
2453 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2454 // dct_offset = s->linesize * 16;
2457 if (!simple && IS_INTRA_PCM(mb_type)) {
2458 for (i=0; i<16; i++) {
2459 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2461 for (i=0; i<8; i++) {
2462 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2463 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2466 if(IS_INTRA(mb_type)){
2467 if(h->deblocking_filter)
2468 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2470 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2471 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2472 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2475 if(IS_INTRA4x4(mb_type)){
2476 if(simple || !s->encoding){
2477 if(IS_8x8DCT(mb_type)){
2478 if(transform_bypass){
2480 idct_add = s->dsp.add_pixels8;
2482 idct_dc_add = s->dsp.h264_idct8_dc_add;
2483 idct_add = s->dsp.h264_idct8_add;
2485 for(i=0; i<16; i+=4){
2486 uint8_t * const ptr= dest_y + block_offset[i];
2487 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2488 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2489 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2491 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2492 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2493 (h->topright_samples_available<<i)&0x4000, linesize);
2495 if(nnz == 1 && h->mb[i*16])
2496 idct_dc_add(ptr, h->mb + i*16, linesize);
2498 idct_add (ptr, h->mb + i*16, linesize);
2503 if(transform_bypass){
2505 idct_add = s->dsp.add_pixels4;
2507 idct_dc_add = s->dsp.h264_idct_dc_add;
2508 idct_add = s->dsp.h264_idct_add;
2510 for(i=0; i<16; i++){
2511 uint8_t * const ptr= dest_y + block_offset[i];
2512 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2514 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2515 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2519 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2520 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2521 assert(mb_y || linesize <= block_offset[i]);
2522 if(!topright_avail){
2523 tr= ptr[3 - linesize]*0x01010101;
2524 topright= (uint8_t*) &tr;
2526 topright= ptr + 4 - linesize;
2530 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2531 nnz = h->non_zero_count_cache[ scan8[i] ];
2534 if(nnz == 1 && h->mb[i*16])
2535 idct_dc_add(ptr, h->mb + i*16, linesize);
2537 idct_add (ptr, h->mb + i*16, linesize);
2539 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2546 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2548 if(!transform_bypass)
2549 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2551 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2553 if(h->deblocking_filter)
2554 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2556 hl_motion(h, dest_y, dest_cb, dest_cr,
2557 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2558 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2559 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2563 if(!IS_INTRA4x4(mb_type)){
2565 if(IS_INTRA16x16(mb_type)){
2566 if(transform_bypass){
2567 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2568 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2570 for(i=0; i<16; i++){
2571 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2572 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2576 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2578 }else if(h->cbp&15){
2579 if(transform_bypass){
2580 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2581 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2582 for(i=0; i<16; i+=di){
2583 if(h->non_zero_count_cache[ scan8[i] ]){
2584 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2588 if(IS_8x8DCT(mb_type)){
2589 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2591 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2596 for(i=0; i<16; i++){
2597 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2598 uint8_t * const ptr= dest_y + block_offset[i];
2599 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2605 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2606 uint8_t *dest[2] = {dest_cb, dest_cr};
2607 if(transform_bypass){
2608 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2609 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2610 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2612 idct_add = s->dsp.add_pixels4;
2613 for(i=16; i<16+8; i++){
2614 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2615 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2619 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2620 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2622 idct_add = s->dsp.h264_idct_add;
2623 idct_dc_add = s->dsp.h264_idct_dc_add;
2624 for(i=16; i<16+8; i++){
2625 if(h->non_zero_count_cache[ scan8[i] ])
2626 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2627 else if(h->mb[i*16])
2628 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2631 for(i=16; i<16+8; i++){
2632 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2633 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2634 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2641 if(h->cbp || IS_INTRA(mb_type))
2642 s->dsp.clear_blocks(h->mb);
2644 if(h->deblocking_filter) {
2645 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2646 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2647 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2648 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2649 if (!simple && FRAME_MBAFF) {
2650 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2652 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2658 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2660 static void hl_decode_mb_simple(H264Context *h){
2661 hl_decode_mb_internal(h, 1);
2665 * Process a macroblock; this handles edge cases, such as interlacing.
2667 static void av_noinline hl_decode_mb_complex(H264Context *h){
2668 hl_decode_mb_internal(h, 0);
2671 static void hl_decode_mb(H264Context *h){
2672 MpegEncContext * const s = &h->s;
2673 const int mb_xy= h->mb_xy;
2674 const int mb_type= s->current_picture.mb_type[mb_xy];
2675 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2677 if(ENABLE_H264_ENCODER && !s->decode)
2681 hl_decode_mb_complex(h);
2682 else hl_decode_mb_simple(h);
2685 static void pic_as_field(Picture *pic, const int parity){
2687 for (i = 0; i < 4; ++i) {
2688 if (parity == PICT_BOTTOM_FIELD)
2689 pic->data[i] += pic->linesize[i];
2690 pic->reference = parity;
2691 pic->linesize[i] *= 2;
2693 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2696 static int split_field_copy(Picture *dest, Picture *src,
2697 int parity, int id_add){
2698 int match = !!(src->reference & parity);
2702 if(parity != PICT_FRAME){
2703 pic_as_field(dest, parity);
2705 dest->pic_id += id_add;
2712 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2716 while(i[0]<len || i[1]<len){
2717 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2719 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2722 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2726 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2727 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2734 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2739 best_poc= dir ? INT_MIN : INT_MAX;
2741 for(i=0; i<len; i++){
2742 const int poc= src[i]->poc;
2743 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2745 sorted[out_i]= src[i];
2748 if(best_poc == (dir ? INT_MIN : INT_MAX))
2750 limit= sorted[out_i++]->poc - dir;
2756 * fills the default_ref_list.
2758 static int fill_default_ref_list(H264Context *h){
2759 MpegEncContext * const s = &h->s;
2762 if(h->slice_type_nos==FF_B_TYPE){
2763 Picture *sorted[32];
2768 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2770 cur_poc= s->current_picture_ptr->poc;
2772 for(list= 0; list<2; list++){
2773 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2774 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2776 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2777 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2780 if(len < h->ref_count[list])
2781 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2785 if(lens[0] == lens[1] && lens[1] > 1){
2786 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2788 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2791 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2792 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2794 if(len < h->ref_count[0])
2795 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2798 for (i=0; i<h->ref_count[0]; i++) {
2799 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2801 if(h->slice_type_nos==FF_B_TYPE){
2802 for (i=0; i<h->ref_count[1]; i++) {
2803 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2810 static void print_short_term(H264Context *h);
2811 static void print_long_term(H264Context *h);
2814 * Extract structure information about the picture described by pic_num in
2815 * the current decoding context (frame or field). Note that pic_num is
2816 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2817 * @param pic_num picture number for which to extract structure information
2818 * @param structure one of PICT_XXX describing structure of picture
2820 * @return frame number (short term) or long term index of picture
2821 * described by pic_num
2823 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2824 MpegEncContext * const s = &h->s;
2826 *structure = s->picture_structure;
2829 /* opposite field */
2830 *structure ^= PICT_FRAME;
2837 static int decode_ref_pic_list_reordering(H264Context *h){
2838 MpegEncContext * const s = &h->s;
2839 int list, index, pic_structure;
2841 print_short_term(h);
2844 for(list=0; list<h->list_count; list++){
2845 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2847 if(get_bits1(&s->gb)){
2848 int pred= h->curr_pic_num;
2850 for(index=0; ; index++){
2851 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2852 unsigned int pic_id;
2854 Picture *ref = NULL;
2856 if(reordering_of_pic_nums_idc==3)
2859 if(index >= h->ref_count[list]){
2860 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2864 if(reordering_of_pic_nums_idc<3){
2865 if(reordering_of_pic_nums_idc<2){
2866 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2869 if(abs_diff_pic_num > h->max_pic_num){
2870 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2874 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2875 else pred+= abs_diff_pic_num;
2876 pred &= h->max_pic_num - 1;
2878 frame_num = pic_num_extract(h, pred, &pic_structure);
2880 for(i= h->short_ref_count-1; i>=0; i--){
2881 ref = h->short_ref[i];
2882 assert(ref->reference);
2883 assert(!ref->long_ref);
2885 ref->frame_num == frame_num &&
2886 (ref->reference & pic_structure)
2894 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2896 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2899 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2902 ref = h->long_ref[long_idx];
2903 assert(!(ref && !ref->reference));
2904 if(ref && (ref->reference & pic_structure)){
2905 ref->pic_id= pic_id;
2906 assert(ref->long_ref);
2914 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2915 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2917 for(i=index; i+1<h->ref_count[list]; i++){
2918 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2921 for(; i > index; i--){
2922 h->ref_list[list][i]= h->ref_list[list][i-1];
2924 h->ref_list[list][index]= *ref;
2926 pic_as_field(&h->ref_list[list][index], pic_structure);
2930 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2936 for(list=0; list<h->list_count; list++){
2937 for(index= 0; index < h->ref_count[list]; index++){
2938 if(!h->ref_list[list][index].data[0]){
2939 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2940 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2948 static void fill_mbaff_ref_list(H264Context *h){
2950 for(list=0; list<2; list++){ //FIXME try list_count
2951 for(i=0; i<h->ref_count[list]; i++){
2952 Picture *frame = &h->ref_list[list][i];
2953 Picture *field = &h->ref_list[list][16+2*i];
2956 field[0].linesize[j] <<= 1;
2957 field[0].reference = PICT_TOP_FIELD;
2958 field[0].poc= field[0].field_poc[0];
2959 field[1] = field[0];
2961 field[1].data[j] += frame->linesize[j];
2962 field[1].reference = PICT_BOTTOM_FIELD;
2963 field[1].poc= field[1].field_poc[1];
2965 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2966 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2968 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2969 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2973 for(j=0; j<h->ref_count[1]; j++){
2974 for(i=0; i<h->ref_count[0]; i++)
2975 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2976 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2977 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2981 static int pred_weight_table(H264Context *h){
2982 MpegEncContext * const s = &h->s;
2984 int luma_def, chroma_def;
2987 h->use_weight_chroma= 0;
2988 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2989 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2990 luma_def = 1<<h->luma_log2_weight_denom;
2991 chroma_def = 1<<h->chroma_log2_weight_denom;
2993 for(list=0; list<2; list++){
2994 for(i=0; i<h->ref_count[list]; i++){
2995 int luma_weight_flag, chroma_weight_flag;
2997 luma_weight_flag= get_bits1(&s->gb);
2998 if(luma_weight_flag){
2999 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3000 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3001 if( h->luma_weight[list][i] != luma_def
3002 || h->luma_offset[list][i] != 0)
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
3017 || h->chroma_offset[list][i][j] != 0)
3018 h->use_weight_chroma= 1;
3023 h->chroma_weight[list][i][j]= chroma_def;
3024 h->chroma_offset[list][i][j]= 0;
3029 if(h->slice_type_nos != FF_B_TYPE) break;
3031 h->use_weight= h->use_weight || h->use_weight_chroma;
3035 static void implicit_weight_table(H264Context *h){
3036 MpegEncContext * const s = &h->s;
3038 int cur_poc = s->current_picture_ptr->poc;
3040 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3041 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3043 h->use_weight_chroma= 0;
3048 h->use_weight_chroma= 2;
3049 h->luma_log2_weight_denom= 5;
3050 h->chroma_log2_weight_denom= 5;
3052 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3053 int poc0 = h->ref_list[0][ref0].poc;
3054 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3055 int poc1 = h->ref_list[1][ref1].poc;
3056 int td = av_clip(poc1 - poc0, -128, 127);
3058 int tb = av_clip(cur_poc - poc0, -128, 127);
3059 int tx = (16384 + (FFABS(td) >> 1)) / td;
3060 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3061 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3062 h->implicit_weight[ref0][ref1] = 32;
3064 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3066 h->implicit_weight[ref0][ref1] = 32;
3072 * Mark a picture as no longer needed for reference. The refmask
3073 * argument allows unreferencing of individual fields or the whole frame.
3074 * If the picture becomes entirely unreferenced, but is being held for
3075 * display purposes, it is marked as such.
3076 * @param refmask mask of fields to unreference; the mask is bitwise
3077 * anded with the reference marking of pic
3078 * @return non-zero if pic becomes entirely unreferenced (except possibly
3079 * for display purposes) zero if one of the fields remains in
3082 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3084 if (pic->reference &= refmask) {
3087 for(i = 0; h->delayed_pic[i]; i++)
3088 if(pic == h->delayed_pic[i]){
3089 pic->reference=DELAYED_PIC_REF;
3097 * instantaneous decoder refresh.
3099 static void idr(H264Context *h){
3102 for(i=0; i<16; i++){
3103 remove_long(h, i, 0);
3105 assert(h->long_ref_count==0);
3107 for(i=0; i<h->short_ref_count; i++){
3108 unreference_pic(h, h->short_ref[i], 0);
3109 h->short_ref[i]= NULL;
3111 h->short_ref_count=0;
3112 h->prev_frame_num= 0;
3113 h->prev_frame_num_offset= 0;
3118 /* forget old pics after a seek */
3119 static void flush_dpb(AVCodecContext *avctx){
3120 H264Context *h= avctx->priv_data;
3122 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3123 if(h->delayed_pic[i])
3124 h->delayed_pic[i]->reference= 0;
3125 h->delayed_pic[i]= NULL;
3127 h->outputed_poc= INT_MIN;
3129 if(h->s.current_picture_ptr)
3130 h->s.current_picture_ptr->reference= 0;
3131 h->s.first_field= 0;
3132 ff_mpeg_flush(avctx);
3136 * Find a Picture in the short term reference list by frame number.
3137 * @param frame_num frame number to search for
3138 * @param idx the index into h->short_ref where returned picture is found
3139 * undefined if no picture found.
3140 * @return pointer to the found picture, or NULL if no pic with the provided
3141 * frame number is found
3143 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3144 MpegEncContext * const s = &h->s;
3147 for(i=0; i<h->short_ref_count; i++){
3148 Picture *pic= h->short_ref[i];
3149 if(s->avctx->debug&FF_DEBUG_MMCO)
3150 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3151 if(pic->frame_num == frame_num) {
3160 * Remove a picture from the short term reference list by its index in
3161 * that list. This does no checking on the provided index; it is assumed
3162 * to be valid. Other list entries are shifted down.
3163 * @param i index into h->short_ref of picture to remove.
3165 static void remove_short_at_index(H264Context *h, int i){
3166 assert(i >= 0 && i < h->short_ref_count);
3167 h->short_ref[i]= NULL;
3168 if (--h->short_ref_count)
3169 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3174 * @return the removed picture or NULL if an error occurs
3176 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3177 MpegEncContext * const s = &h->s;
3181 if(s->avctx->debug&FF_DEBUG_MMCO)
3182 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3184 pic = find_short(h, frame_num, &i);
3186 if(unreference_pic(h, pic, ref_mask))
3187 remove_short_at_index(h, i);
3194 * Remove a picture from the long term reference list by its index in
3196 * @return the removed picture or NULL if an error occurs
3198 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3201 pic= h->long_ref[i];
3203 if(unreference_pic(h, pic, ref_mask)){
3204 assert(h->long_ref[i]->long_ref == 1);
3205 h->long_ref[i]->long_ref= 0;
3206 h->long_ref[i]= NULL;
3207 h->long_ref_count--;
3215 * print short term list
3217 static void print_short_term(H264Context *h) {
3219 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3220 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3221 for(i=0; i<h->short_ref_count; i++){
3222 Picture *pic= h->short_ref[i];
3223 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3229 * print long term list
3231 static void print_long_term(H264Context *h) {
3233 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3234 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3235 for(i = 0; i < 16; i++){
3236 Picture *pic= h->long_ref[i];
3238 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3245 * Executes the reference picture marking (memory management control operations).
3247 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3248 MpegEncContext * const s = &h->s;
3250 int current_ref_assigned=0;
3253 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3256 for(i=0; i<mmco_count; i++){
3257 int structure, frame_num;
3258 if(s->avctx->debug&FF_DEBUG_MMCO)
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3261 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3262 || mmco[i].opcode == MMCO_SHORT2LONG){
3263 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3264 pic = find_short(h, frame_num, &j);
3266 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3267 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3268 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3273 switch(mmco[i].opcode){
3274 case MMCO_SHORT2UNUSED:
3275 if(s->avctx->debug&FF_DEBUG_MMCO)
3276 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3277 remove_short(h, frame_num, structure ^ PICT_FRAME);
3279 case MMCO_SHORT2LONG:
3280 if (h->long_ref[mmco[i].long_arg] != pic)
3281 remove_long(h, mmco[i].long_arg, 0);
3283 remove_short_at_index(h, j);
3284 h->long_ref[ mmco[i].long_arg ]= pic;
3285 if (h->long_ref[ mmco[i].long_arg ]){
3286 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3287 h->long_ref_count++;
3290 case MMCO_LONG2UNUSED:
3291 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3292 pic = h->long_ref[j];
3294 remove_long(h, j, structure ^ PICT_FRAME);
3295 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3296 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3299 // Comment below left from previous code as it is an interresting note.
3300 /* First field in pair is in short term list or
3301 * at a different long term index.
3302 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3303 * Report the problem and keep the pair where it is,
3304 * and mark this field valid.
3307 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3308 remove_long(h, mmco[i].long_arg, 0);
3310 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3311 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3312 h->long_ref_count++;
3315 s->current_picture_ptr->reference |= s->picture_structure;
3316 current_ref_assigned=1;
3318 case MMCO_SET_MAX_LONG:
3319 assert(mmco[i].long_arg <= 16);
3320 // just remove the long term which index is greater than new max
3321 for(j = mmco[i].long_arg; j<16; j++){
3322 remove_long(h, j, 0);
3326 while(h->short_ref_count){
3327 remove_short(h, h->short_ref[0]->frame_num, 0);
3329 for(j = 0; j < 16; j++) {
3330 remove_long(h, j, 0);
3332 s->current_picture_ptr->poc=
3333 s->current_picture_ptr->field_poc[0]=
3334 s->current_picture_ptr->field_poc[1]=
3338 s->current_picture_ptr->frame_num= 0;
3344 if (!current_ref_assigned) {
3345 /* Second field of complementary field pair; the first field of
3346 * which is already referenced. If short referenced, it
3347 * should be first entry in short_ref. If not, it must exist
3348 * in long_ref; trying to put it on the short list here is an
3349 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3351 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3352 /* Just mark the second field valid */
3353 s->current_picture_ptr->reference = PICT_FRAME;
3354 } else if (s->current_picture_ptr->long_ref) {
3355 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3356 "assignment for second field "
3357 "in complementary field pair "
3358 "(first field is long term)\n");
3360 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3362 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3365 if(h->short_ref_count)
3366 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3368 h->short_ref[0]= s->current_picture_ptr;
3369 h->short_ref_count++;
3370 s->current_picture_ptr->reference |= s->picture_structure;
3374 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3376 /* We have too many reference frames, probably due to corrupted
3377 * stream. Need to discard one frame. Prevents overrun of the
3378 * short_ref and long_ref buffers.
3380 av_log(h->s.avctx, AV_LOG_ERROR,
3381 "number of reference frames exceeds max (probably "
3382 "corrupt input), discarding one\n");
3384 if (h->long_ref_count && !h->short_ref_count) {
3385 for (i = 0; i < 16; ++i)
3390 remove_long(h, i, 0);
3392 pic = h->short_ref[h->short_ref_count - 1];
3393 remove_short(h, pic->frame_num, 0);
3397 print_short_term(h);
3402 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3403 MpegEncContext * const s = &h->s;
3407 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3408 s->broken_link= get_bits1(gb) -1;
3410 h->mmco[0].opcode= MMCO_LONG;
3411 h->mmco[0].long_arg= 0;
3415 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3416 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3417 MMCOOpcode opcode= get_ue_golomb_31(gb);
3419 h->mmco[i].opcode= opcode;
3420 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3421 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3422 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3423 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3427 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3428 unsigned int long_arg= get_ue_golomb_31(gb);
3429 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3430 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3433 h->mmco[i].long_arg= long_arg;
3436 if(opcode > (unsigned)MMCO_LONG){
3437 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3440 if(opcode == MMCO_END)
3445 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3447 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3448 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3449 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3450 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3452 if (FIELD_PICTURE) {
3453 h->mmco[0].short_pic_num *= 2;
3454 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3455 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3465 static int init_poc(H264Context *h){
3466 MpegEncContext * const s = &h->s;
3467 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3469 Picture *cur = s->current_picture_ptr;
3471 h->frame_num_offset= h->prev_frame_num_offset;
3472 if(h->frame_num < h->prev_frame_num)
3473 h->frame_num_offset += max_frame_num;
3475 if(h->sps.poc_type==0){
3476 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3478 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3479 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3480 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3481 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3483 h->poc_msb = h->prev_poc_msb;
3484 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3486 field_poc[1] = h->poc_msb + h->poc_lsb;
3487 if(s->picture_structure == PICT_FRAME)
3488 field_poc[1] += h->delta_poc_bottom;
3489 }else if(h->sps.poc_type==1){
3490 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3493 if(h->sps.poc_cycle_length != 0)
3494 abs_frame_num = h->frame_num_offset + h->frame_num;
3498 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3501 expected_delta_per_poc_cycle = 0;
3502 for(i=0; i < h->sps.poc_cycle_length; i++)
3503 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3505 if(abs_frame_num > 0){
3506 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3507 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3509 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3510 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3511 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3515 if(h->nal_ref_idc == 0)
3516 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3518 field_poc[0] = expectedpoc + h->delta_poc[0];
3519 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3521 if(s->picture_structure == PICT_FRAME)
3522 field_poc[1] += h->delta_poc[1];
3524 int poc= 2*(h->frame_num_offset + h->frame_num);
3533 if(s->picture_structure != PICT_BOTTOM_FIELD)
3534 s->current_picture_ptr->field_poc[0]= field_poc[0];
3535 if(s->picture_structure != PICT_TOP_FIELD)
3536 s->current_picture_ptr->field_poc[1]= field_poc[1];
3537 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3544 * initialize scan tables
3546 static void init_scan_tables(H264Context *h){
3547 MpegEncContext * const s = &h->s;
3549 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3550 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3551 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3553 for(i=0; i<16; i++){
3554 #define T(x) (x>>2) | ((x<<2) & 0xF)
3555 h->zigzag_scan[i] = T(zigzag_scan[i]);
3556 h-> field_scan[i] = T( field_scan[i]);
3560 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3561 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3562 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3563 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3564 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3566 for(i=0; i<64; i++){
3567 #define T(x) (x>>3) | ((x&7)<<3)
3568 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3569 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3570 h->field_scan8x8[i] = T(field_scan8x8[i]);
3571 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3575 if(h->sps.transform_bypass){ //FIXME same ugly
3576 h->zigzag_scan_q0 = zigzag_scan;
3577 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3578 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3579 h->field_scan_q0 = field_scan;
3580 h->field_scan8x8_q0 = field_scan8x8;
3581 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3583 h->zigzag_scan_q0 = h->zigzag_scan;
3584 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3585 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3586 h->field_scan_q0 = h->field_scan;
3587 h->field_scan8x8_q0 = h->field_scan8x8;
3588 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3593 * Replicates H264 "master" context to thread contexts.
3595 static void clone_slice(H264Context *dst, H264Context *src)
3597 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3598 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3599 dst->s.current_picture = src->s.current_picture;
3600 dst->s.linesize = src->s.linesize;
3601 dst->s.uvlinesize = src->s.uvlinesize;
3602 dst->s.first_field = src->s.first_field;
3604 dst->prev_poc_msb = src->prev_poc_msb;
3605 dst->prev_poc_lsb = src->prev_poc_lsb;
3606 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3607 dst->prev_frame_num = src->prev_frame_num;
3608 dst->short_ref_count = src->short_ref_count;
3610 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3611 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3612 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3613 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3615 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3616 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3620 * decodes a slice header.
3621 * This will also call MPV_common_init() and frame_start() as needed.
3623 * @param h h264context
3624 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3626 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3628 static int decode_slice_header(H264Context *h, H264Context *h0){
3629 MpegEncContext * const s = &h->s;
3630 MpegEncContext * const s0 = &h0->s;
3631 unsigned int first_mb_in_slice;
3632 unsigned int pps_id;
3633 int num_ref_idx_active_override_flag;
3634 unsigned int slice_type, tmp, i, j;
3635 int default_ref_list_done = 0;
3636 int last_pic_structure;
3638 s->dropable= h->nal_ref_idc == 0;
3640 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3641 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3642 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3644 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3645 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3648 first_mb_in_slice= get_ue_golomb(&s->gb);
3650 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3651 h0->current_slice = 0;
3652 if (!s0->first_field)
3653 s->current_picture_ptr= NULL;
3656 slice_type= get_ue_golomb_31(&s->gb);
3658 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3663 h->slice_type_fixed=1;
3665 h->slice_type_fixed=0;
3667 slice_type= golomb_to_pict_type[ slice_type ];
3668 if (slice_type == FF_I_TYPE
3669 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3670 default_ref_list_done = 1;
3672 h->slice_type= slice_type;
3673 h->slice_type_nos= slice_type & 3;
3675 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3676 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3677 av_log(h->s.avctx, AV_LOG_ERROR,
3678 "B picture before any references, skipping\n");
3682 pps_id= get_ue_golomb(&s->gb);
3683 if(pps_id>=MAX_PPS_COUNT){
3684 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3687 if(!h0->pps_buffers[pps_id]) {
3688 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3691 h->pps= *h0->pps_buffers[pps_id];
3693 if(!h0->sps_buffers[h->pps.sps_id]) {
3694 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3697 h->sps = *h0->sps_buffers[h->pps.sps_id];
3699 if(h == h0 && h->dequant_coeff_pps != pps_id){
3700 h->dequant_coeff_pps = pps_id;
3701 init_dequant_tables(h);
3704 s->mb_width= h->sps.mb_width;
3705 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3707 h->b_stride= s->mb_width*4;
3708 h->b8_stride= s->mb_width*2;
3710 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3711 if(h->sps.frame_mbs_only_flag)
3712 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3714 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3716 if (s->context_initialized
3717 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3719 return -1; // width / height changed during parallelized decoding
3721 flush_dpb(s->avctx);
3724 if (!s->context_initialized) {
3726 return -1; // we cant (re-)initialize context during parallel decoding
3727 if (MPV_common_init(s) < 0)
3731 init_scan_tables(h);
3734 for(i = 1; i < s->avctx->thread_count; i++) {
3736 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3737 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3738 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3741 init_scan_tables(c);
3745 for(i = 0; i < s->avctx->thread_count; i++)
3746 if(context_init(h->thread_context[i]) < 0)
3749 s->avctx->width = s->width;
3750 s->avctx->height = s->height;
3751 s->avctx->sample_aspect_ratio= h->sps.sar;
3752 if(!s->avctx->sample_aspect_ratio.den)
3753 s->avctx->sample_aspect_ratio.den = 1;
3755 if(h->sps.timing_info_present_flag){
3756 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3757 if(h->x264_build > 0 && h->x264_build < 44)
3758 s->avctx->time_base.den *= 2;
3759 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3760 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3764 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3767 h->mb_aff_frame = 0;
3768 last_pic_structure = s0->picture_structure;
3769 if(h->sps.frame_mbs_only_flag){
3770 s->picture_structure= PICT_FRAME;
3772 if(get_bits1(&s->gb)) { //field_pic_flag
3773 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3775 s->picture_structure= PICT_FRAME;
3776 h->mb_aff_frame = h->sps.mb_aff;
3779 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3781 if(h0->current_slice == 0){
3782 while(h->frame_num != h->prev_frame_num &&
3783 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3784 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3786 h->prev_frame_num++;
3787 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3788 s->current_picture_ptr->frame_num= h->prev_frame_num;
3789 execute_ref_pic_marking(h, NULL, 0);
3792 /* See if we have a decoded first field looking for a pair... */
3793 if (s0->first_field) {
3794 assert(s0->current_picture_ptr);
3795 assert(s0->current_picture_ptr->data[0]);
3796 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3798 /* figure out if we have a complementary field pair */
3799 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3801 * Previous field is unmatched. Don't display it, but let it
3802 * remain for reference if marked as such.
3804 s0->current_picture_ptr = NULL;
3805 s0->first_field = FIELD_PICTURE;
3808 if (h->nal_ref_idc &&
3809 s0->current_picture_ptr->reference &&
3810 s0->current_picture_ptr->frame_num != h->frame_num) {
3812 * This and previous field were reference, but had
3813 * different frame_nums. Consider this field first in
3814 * pair. Throw away previous field except for reference
3817 s0->first_field = 1;
3818 s0->current_picture_ptr = NULL;
3821 /* Second field in complementary pair */
3822 s0->first_field = 0;
3827 /* Frame or first field in a potentially complementary pair */
3828 assert(!s0->current_picture_ptr);
3829 s0->first_field = FIELD_PICTURE;
3832 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3833 s0->first_field = 0;
3840 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3842 assert(s->mb_num == s->mb_width * s->mb_height);
3843 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3844 first_mb_in_slice >= s->mb_num){
3845 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3848 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3849 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3850 if (s->picture_structure == PICT_BOTTOM_FIELD)
3851 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3852 assert(s->mb_y < s->mb_height);
3854 if(s->picture_structure==PICT_FRAME){
3855 h->curr_pic_num= h->frame_num;
3856 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3858 h->curr_pic_num= 2*h->frame_num + 1;
3859 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3862 if(h->nal_unit_type == NAL_IDR_SLICE){
3863 get_ue_golomb(&s->gb); /* idr_pic_id */
3866 if(h->sps.poc_type==0){
3867 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3869 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3870 h->delta_poc_bottom= get_se_golomb(&s->gb);
3874 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3875 h->delta_poc[0]= get_se_golomb(&s->gb);
3877 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3878 h->delta_poc[1]= get_se_golomb(&s->gb);
3883 if(h->pps.redundant_pic_cnt_present){
3884 h->redundant_pic_count= get_ue_golomb(&s->gb);
3887 //set defaults, might be overridden a few lines later
3888 h->ref_count[0]= h->pps.ref_count[0];
3889 h->ref_count[1]= h->pps.ref_count[1];
3891 if(h->slice_type_nos != FF_I_TYPE){
3892 if(h->slice_type_nos == FF_B_TYPE){
3893 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3895 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3897 if(num_ref_idx_active_override_flag){
3898 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3899 if(h->slice_type_nos==FF_B_TYPE)
3900 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3902 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3903 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3904 h->ref_count[0]= h->ref_count[1]= 1;
3908 if(h->slice_type_nos == FF_B_TYPE)
3915 if(!default_ref_list_done){
3916 fill_default_ref_list(h);
3919 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3922 if(h->slice_type_nos!=FF_I_TYPE){
3923 s->last_picture_ptr= &h->ref_list[0][0];
3924 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3926 if(h->slice_type_nos==FF_B_TYPE){
3927 s->next_picture_ptr= &h->ref_list[1][0];
3928 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3931 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3932 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3933 pred_weight_table(h);
3934 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3935 implicit_weight_table(h);
3940 decode_ref_pic_marking(h0, &s->gb);
3943 fill_mbaff_ref_list(h);
3945 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3946 direct_dist_scale_factor(h);
3947 direct_ref_list_init(h);
3949 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3950 tmp = get_ue_golomb_31(&s->gb);
3952 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3955 h->cabac_init_idc= tmp;
3958 h->last_qscale_diff = 0;
3959 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3961 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3965 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3966 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3967 //FIXME qscale / qp ... stuff
3968 if(h->slice_type == FF_SP_TYPE){
3969 get_bits1(&s->gb); /* sp_for_switch_flag */
3971 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3972 get_se_golomb(&s->gb); /* slice_qs_delta */
3975 h->deblocking_filter = 1;
3976 h->slice_alpha_c0_offset = 0;
3977 h->slice_beta_offset = 0;
3978 if( h->pps.deblocking_filter_parameters_present ) {
3979 tmp= get_ue_golomb_31(&s->gb);
3981 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3984 h->deblocking_filter= tmp;
3985 if(h->deblocking_filter < 2)
3986 h->deblocking_filter^= 1; // 1<->0
3988 if( h->deblocking_filter ) {
3989 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3990 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3994 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3995 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3996 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3997 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3998 h->deblocking_filter= 0;
4000 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4001 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4002 /* Cheat slightly for speed:
4003 Do not bother to deblock across slices. */
4004 h->deblocking_filter = 2;
4006 h0->max_contexts = 1;
4007 if(!h0->single_decode_warning) {
4008 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4009 h0->single_decode_warning = 1;
4012 return 1; // deblocking switched inside frame
4017 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4018 slice_group_change_cycle= get_bits(&s->gb, ?);
4021 h0->last_slice_type = slice_type;
4022 h->slice_num = ++h0->current_slice;
4023 if(h->slice_num >= MAX_SLICES){
4024 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4028 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4032 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4033 +(h->ref_list[j][i].reference&3);
4036 for(i=16; i<48; i++)
4037 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4038 +(h->ref_list[j][i].reference&3);
4041 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4042 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4044 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4045 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4047 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4049 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4050 pps_id, h->frame_num,
4051 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4052 h->ref_count[0], h->ref_count[1],
4054 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4056 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4057 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4067 static inline int get_level_prefix(GetBitContext *gb){
4071 OPEN_READER(re, gb);
4072 UPDATE_CACHE(re, gb);
4073 buf=GET_CACHE(re, gb);
4075 log= 32 - av_log2(buf);
4077 print_bin(buf>>(32-log), log);
4078 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4081 LAST_SKIP_BITS(re, gb, log);
4082 CLOSE_READER(re, gb);
4087 static inline int get_dct8x8_allowed(H264Context *h){
4088 if(h->sps.direct_8x8_inference_flag)
4089 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4091 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4095 * decodes a residual block.
4096 * @param n block index
4097 * @param scantable scantable
4098 * @param max_coeff number of coefficients in the block
4099 * @return <0 if an error occurred
4101 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4102 MpegEncContext * const s = &h->s;
4103 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4105 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4107 //FIXME put trailing_onex into the context
4109 if(n == CHROMA_DC_BLOCK_INDEX){
4110 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4111 total_coeff= coeff_token>>2;
4113 if(n == LUMA_DC_BLOCK_INDEX){
4114 total_coeff= pred_non_zero_count(h, 0);
4115 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4116 total_coeff= coeff_token>>2;
4118 total_coeff= pred_non_zero_count(h, n);
4119 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4120 total_coeff= coeff_token>>2;
4121 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4125 //FIXME set last_non_zero?
4129 if(total_coeff > (unsigned)max_coeff) {
4130 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4134 trailing_ones= coeff_token&3;
4135 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4136 assert(total_coeff<=16);
4138 i = show_bits(gb, 3);
4139 skip_bits(gb, trailing_ones);
4140 level[0] = 1-((i&4)>>1);
4141 level[1] = 1-((i&2) );
4142 level[2] = 1-((i&1)<<1);
4144 if(trailing_ones<total_coeff) {
4146 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4147 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4148 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4150 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4151 if(level_code >= 100){
4152 prefix= level_code - 100;
4153 if(prefix == LEVEL_TAB_BITS)
4154 prefix += get_level_prefix(gb);
4156 //first coefficient has suffix_length equal to 0 or 1
4157 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4159 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4161 level_code= (prefix<<suffix_length); //part
4162 }else if(prefix==14){
4164 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4166 level_code= prefix + get_bits(gb, 4); //part
4168 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4169 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4171 level_code += (1<<(prefix-3))-4096;
4174 if(trailing_ones < 3) level_code += 2;
4177 mask= -(level_code&1);
4178 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4180 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4183 if(level_code + 3U > 6U)
4185 level[trailing_ones]= level_code;
4188 //remaining coefficients have suffix_length > 0
4189 for(i=trailing_ones+1;i<total_coeff;i++) {
4190 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4191 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4192 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4194 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4195 if(level_code >= 100){
4196 prefix= level_code - 100;
4197 if(prefix == LEVEL_TAB_BITS){
4198 prefix += get_level_prefix(gb);
4201 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4203 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4205 level_code += (1<<(prefix-3))-4096;
4207 mask= -(level_code&1);
4208 level_code= (((2+level_code)>>1) ^ mask) - mask;
4210 level[i]= level_code;
4212 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4217 if(total_coeff == max_coeff)
4220 if(n == CHROMA_DC_BLOCK_INDEX)
4221 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4223 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4226 coeff_num = zeros_left + total_coeff - 1;
4227 j = scantable[coeff_num];
4229 block[j] = level[0];
4230 for(i=1;i<total_coeff;i++) {
4233 else if(zeros_left < 7){
4234 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4236 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4238 zeros_left -= run_before;
4239 coeff_num -= 1 + run_before;
4240 j= scantable[ coeff_num ];
4245 block[j] = (level[0] * qmul[j] + 32)>>6;
4246 for(i=1;i<total_coeff;i++) {
4249 else if(zeros_left < 7){
4250 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4252 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4254 zeros_left -= run_before;
4255 coeff_num -= 1 + run_before;
4256 j= scantable[ coeff_num ];
4258 block[j]= (level[i] * qmul[j] + 32)>>6;
4263 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4270 static void predict_field_decoding_flag(H264Context *h){
4271 MpegEncContext * const s = &h->s;
4272 const int mb_xy= h->mb_xy;
4273 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4274 ? s->current_picture.mb_type[mb_xy-1]
4275 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4276 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4278 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4282 * decodes a P_SKIP or B_SKIP macroblock
4284 static void decode_mb_skip(H264Context *h){
4285 MpegEncContext * const s = &h->s;
4286 const int mb_xy= h->mb_xy;
4289 memset(h->non_zero_count[mb_xy], 0, 16);
4290 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4293 mb_type|= MB_TYPE_INTERLACED;
4295 if( h->slice_type_nos == FF_B_TYPE )
4297 // just for fill_caches. pred_direct_motion will set the real mb_type
4298 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4300 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4301 pred_direct_motion(h, &mb_type);
4302 mb_type|= MB_TYPE_SKIP;
4307 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4309 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4310 pred_pskip_motion(h, &mx, &my);
4311 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4312 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4315 write_back_motion(h, mb_type);
4316 s->current_picture.mb_type[mb_xy]= mb_type;
4317 s->current_picture.qscale_table[mb_xy]= s->qscale;
4318 h->slice_table[ mb_xy ]= h->slice_num;
4319 h->prev_mb_skipped= 1;
4323 * decodes a macroblock
4324 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4326 static int decode_mb_cavlc(H264Context *h){
4327 MpegEncContext * const s = &h->s;
4329 int partition_count;
4330 unsigned int mb_type, cbp;
4331 int dct8x8_allowed= h->pps.transform_8x8_mode;
4333 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4335 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4336 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4338 if(h->slice_type_nos != FF_I_TYPE){
4339 if(s->mb_skip_run==-1)
4340 s->mb_skip_run= get_ue_golomb(&s->gb);
4342 if (s->mb_skip_run--) {
4343 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4344 if(s->mb_skip_run==0)
4345 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4347 predict_field_decoding_flag(h);
4354 if( (s->mb_y&1) == 0 )
4355 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4358 h->prev_mb_skipped= 0;
4360 mb_type= get_ue_golomb(&s->gb);
4361 if(h->slice_type_nos == FF_B_TYPE){
4363 partition_count= b_mb_type_info[mb_type].partition_count;
4364 mb_type= b_mb_type_info[mb_type].type;
4367 goto decode_intra_mb;
4369 }else if(h->slice_type_nos == FF_P_TYPE){
4371 partition_count= p_mb_type_info[mb_type].partition_count;
4372 mb_type= p_mb_type_info[mb_type].type;
4375 goto decode_intra_mb;
4378 assert(h->slice_type_nos == FF_I_TYPE);
4379 if(h->slice_type == FF_SI_TYPE && mb_type)
4383 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4387 cbp= i_mb_type_info[mb_type].cbp;
4388 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4389 mb_type= i_mb_type_info[mb_type].type;
4393 mb_type |= MB_TYPE_INTERLACED;
4395 h->slice_table[ mb_xy ]= h->slice_num;
4397 if(IS_INTRA_PCM(mb_type)){
4400 // We assume these blocks are very rare so we do not optimize it.
4401 align_get_bits(&s->gb);
4403 // The pixels are stored in the same order as levels in h->mb array.
4404 for(x=0; x < (CHROMA ? 384 : 256); x++){
4405 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4408 // In deblocking, the quantizer is 0
4409 s->current_picture.qscale_table[mb_xy]= 0;
4410 // All coeffs are present
4411 memset(h->non_zero_count[mb_xy], 16, 16);
4413 s->current_picture.mb_type[mb_xy]= mb_type;
4418 h->ref_count[0] <<= 1;
4419 h->ref_count[1] <<= 1;
4422 fill_caches(h, mb_type, 0);
4425 if(IS_INTRA(mb_type)){
4427 // init_top_left_availability(h);
4428 if(IS_INTRA4x4(mb_type)){
4431 if(dct8x8_allowed && get_bits1(&s->gb)){
4432 mb_type |= MB_TYPE_8x8DCT;
4436 // fill_intra4x4_pred_table(h);
4437 for(i=0; i<16; i+=di){
4438 int mode= pred_intra_mode(h, i);
4440 if(!get_bits1(&s->gb)){
4441 const int rem_mode= get_bits(&s->gb, 3);
4442 mode = rem_mode + (rem_mode >= mode);
4446 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4448 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4450 write_back_intra_pred_mode(h);
4451 if( check_intra4x4_pred_mode(h) < 0)
4454 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4455 if(h->intra16x16_pred_mode < 0)
4459 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4462 h->chroma_pred_mode= pred_mode;
4464 }else if(partition_count==4){
4465 int i, j, sub_partition_count[4], list, ref[2][4];
4467 if(h->slice_type_nos == FF_B_TYPE){
4469 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4470 if(h->sub_mb_type[i] >=13){
4471 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4474 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4475 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4477 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4478 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4479 pred_direct_motion(h, &mb_type);
4480 h->ref_cache[0][scan8[4]] =
4481 h->ref_cache[1][scan8[4]] =
4482 h->ref_cache[0][scan8[12]] =
4483 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4486 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4488 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4489 if(h->sub_mb_type[i] >=4){
4490 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4493 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4494 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4498 for(list=0; list<h->list_count; list++){
4499 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4501 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4502 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4503 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4505 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4517 dct8x8_allowed = get_dct8x8_allowed(h);
4519 for(list=0; list<h->list_count; list++){
4521 if(IS_DIRECT(h->sub_mb_type[i])) {
4522 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4525 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4526 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4528 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4529 const int sub_mb_type= h->sub_mb_type[i];
4530 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4531 for(j=0; j<sub_partition_count[i]; j++){
4533 const int index= 4*i + block_width*j;
4534 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4535 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4536 mx += get_se_golomb(&s->gb);
4537 my += get_se_golomb(&s->gb);
4538 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4540 if(IS_SUB_8X8(sub_mb_type)){
4542 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4544 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4545 }else if(IS_SUB_8X4(sub_mb_type)){
4546 mv_cache[ 1 ][0]= mx;
4547 mv_cache[ 1 ][1]= my;
4548 }else if(IS_SUB_4X8(sub_mb_type)){
4549 mv_cache[ 8 ][0]= mx;
4550 mv_cache[ 8 ][1]= my;
4552 mv_cache[ 0 ][0]= mx;
4553 mv_cache[ 0 ][1]= my;
4556 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4562 }else if(IS_DIRECT(mb_type)){
4563 pred_direct_motion(h, &mb_type);
4564 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4566 int list, mx, my, i;
4567 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4568 if(IS_16X16(mb_type)){
4569 for(list=0; list<h->list_count; list++){
4571 if(IS_DIR(mb_type, 0, list)){
4572 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4573 if(val >= h->ref_count[list]){
4574 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4578 val= LIST_NOT_USED&0xFF;
4579 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4581 for(list=0; list<h->list_count; list++){
4583 if(IS_DIR(mb_type, 0, list)){
4584 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4585 mx += get_se_golomb(&s->gb);
4586 my += get_se_golomb(&s->gb);
4587 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4589 val= pack16to32(mx,my);
4592 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4595 else if(IS_16X8(mb_type)){
4596 for(list=0; list<h->list_count; list++){
4599 if(IS_DIR(mb_type, i, list)){
4600 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4601 if(val >= h->ref_count[list]){
4602 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4606 val= LIST_NOT_USED&0xFF;
4607 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4610 for(list=0; list<h->list_count; list++){
4613 if(IS_DIR(mb_type, i, list)){
4614 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4615 mx += get_se_golomb(&s->gb);
4616 my += get_se_golomb(&s->gb);
4617 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4619 val= pack16to32(mx,my);
4622 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4626 assert(IS_8X16(mb_type));
4627 for(list=0; list<h->list_count; list++){
4630 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4631 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4632 if(val >= h->ref_count[list]){
4633 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4637 val= LIST_NOT_USED&0xFF;
4638 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4641 for(list=0; list<h->list_count; list++){
4644 if(IS_DIR(mb_type, i, list)){
4645 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4646 mx += get_se_golomb(&s->gb);
4647 my += get_se_golomb(&s->gb);
4648 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4650 val= pack16to32(mx,my);
4653 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4659 if(IS_INTER(mb_type))
4660 write_back_motion(h, mb_type);
4662 if(!IS_INTRA16x16(mb_type)){
4663 cbp= get_ue_golomb(&s->gb);
4665 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4670 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4671 else cbp= golomb_to_inter_cbp [cbp];
4673 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4674 else cbp= golomb_to_inter_cbp_gray[cbp];
4679 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4680 if(get_bits1(&s->gb)){
4681 mb_type |= MB_TYPE_8x8DCT;
4682 h->cbp_table[mb_xy]= cbp;
4685 s->current_picture.mb_type[mb_xy]= mb_type;
4687 if(cbp || IS_INTRA16x16(mb_type)){
4688 int i8x8, i4x4, chroma_idx;
4690 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4691 const uint8_t *scan, *scan8x8, *dc_scan;
4693 // fill_non_zero_count_cache(h);
4695 if(IS_INTERLACED(mb_type)){
4696 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4697 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4698 dc_scan= luma_dc_field_scan;
4700 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4701 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4702 dc_scan= luma_dc_zigzag_scan;
4705 dquant= get_se_golomb(&s->gb);
4707 if( dquant > 25 || dquant < -26 ){
4708 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4712 s->qscale += dquant;
4713 if(((unsigned)s->qscale) > 51){
4714 if(s->qscale<0) s->qscale+= 52;
4715 else s->qscale-= 52;
4718 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4719 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4720 if(IS_INTRA16x16(mb_type)){
4721 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4722 return -1; //FIXME continue if partitioned and other return -1 too
4725 assert((cbp&15) == 0 || (cbp&15) == 15);
4728 for(i8x8=0; i8x8<4; i8x8++){
4729 for(i4x4=0; i4x4<4; i4x4++){
4730 const int index= i4x4 + 4*i8x8;
4731 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4737 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4740 for(i8x8=0; i8x8<4; i8x8++){
4741 if(cbp & (1<<i8x8)){
4742 if(IS_8x8DCT(mb_type)){
4743 DCTELEM *buf = &h->mb[64*i8x8];
4745 for(i4x4=0; i4x4<4; i4x4++){
4746 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4747 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4750 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4751 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4753 for(i4x4=0; i4x4<4; i4x4++){
4754 const int index= i4x4 + 4*i8x8;
4756 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4762 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4763 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4769 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4770 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4776 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4777 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4778 for(i4x4=0; i4x4<4; i4x4++){
4779 const int index= 16 + 4*chroma_idx + i4x4;
4780 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4786 uint8_t * const nnz= &h->non_zero_count_cache[0];
4787 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4788 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4791 uint8_t * const nnz= &h->non_zero_count_cache[0];
4792 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4793 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4794 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4796 s->current_picture.qscale_table[mb_xy]= s->qscale;
4797 write_back_non_zero_count(h);
4800 h->ref_count[0] >>= 1;
4801 h->ref_count[1] >>= 1;
4807 static int decode_cabac_field_decoding_flag(H264Context *h) {
4808 MpegEncContext * const s = &h->s;
4809 const int mb_x = s->mb_x;
4810 const int mb_y = s->mb_y & ~1;
4811 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4812 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4814 unsigned int ctx = 0;
4816 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4819 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4823 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4826 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4827 uint8_t *state= &h->cabac_state[ctx_base];
4831 MpegEncContext * const s = &h->s;
4832 const int mba_xy = h->left_mb_xy[0];
4833 const int mbb_xy = h->top_mb_xy;
4835 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4837 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4839 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4840 return 0; /* I4x4 */
4843 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4844 return 0; /* I4x4 */
4847 if( get_cabac_terminate( &h->cabac ) )
4848 return 25; /* PCM */
4850 mb_type = 1; /* I16x16 */
4851 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4852 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4853 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4854 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4855 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4859 static int decode_cabac_mb_type_b( H264Context *h ) {
4860 MpegEncContext * const s = &h->s;
4862 const int mba_xy = h->left_mb_xy[0];
4863 const int mbb_xy = h->top_mb_xy;
4866 assert(h->slice_type_nos == FF_B_TYPE);
4868 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4870 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4873 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4874 return 0; /* B_Direct_16x16 */
4876 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4877 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4880 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4881 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4882 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4883 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4885 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4886 else if( bits == 13 ) {
4887 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4888 } else if( bits == 14 )
4889 return 11; /* B_L1_L0_8x16 */
4890 else if( bits == 15 )
4891 return 22; /* B_8x8 */
4893 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4894 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4897 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4898 MpegEncContext * const s = &h->s;
4902 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4903 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4906 && h->slice_table[mba_xy] == h->slice_num
4907 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4908 mba_xy += s->mb_stride;
4910 mbb_xy = mb_xy - s->mb_stride;
4912 && h->slice_table[mbb_xy] == h->slice_num
4913 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4914 mbb_xy -= s->mb_stride;
4916 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4918 int mb_xy = h->mb_xy;
4920 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4923 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4925 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4928 if( h->slice_type_nos == FF_B_TYPE )
4930 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4933 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4936 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4939 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4940 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4941 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4943 if( mode >= pred_mode )
4949 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4950 const int mba_xy = h->left_mb_xy[0];
4951 const int mbb_xy = h->top_mb_xy;
4955 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4956 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4959 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4962 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4965 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4967 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4973 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4974 int cbp_b, cbp_a, ctx, cbp = 0;
4976 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4977 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4979 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4980 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4981 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4982 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4983 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4984 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4985 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4986 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4989 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4993 cbp_a = (h->left_cbp>>4)&0x03;
4994 cbp_b = (h-> top_cbp>>4)&0x03;
4997 if( cbp_a > 0 ) ctx++;
4998 if( cbp_b > 0 ) ctx += 2;
4999 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5003 if( cbp_a == 2 ) ctx++;
5004 if( cbp_b == 2 ) ctx += 2;
5005 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5007 static int decode_cabac_mb_dqp( H264Context *h) {
5008 int ctx= h->last_qscale_diff != 0;
5011 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5014 if(val > 102) //prevent infinite loop
5019 return (val + 1)>>1 ;
5021 return -((val + 1)>>1);
5023 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5024 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5026 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5028 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5032 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5034 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5035 return 0; /* B_Direct_8x8 */
5036 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5037 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5039 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5040 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5041 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5044 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5045 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5049 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5050 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5053 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5054 int refa = h->ref_cache[list][scan8[n] - 1];
5055 int refb = h->ref_cache[list][scan8[n] - 8];
5059 if( h->slice_type_nos == FF_B_TYPE) {
5060 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5062 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5071 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5074 if(ref >= 32 /*h->ref_list[list]*/){
5081 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5082 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5083 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5084 int ctxbase = (l == 0) ? 40 : 47;
5086 int ctx = (amvd>2) + (amvd>32);
5088 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5093 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5101 while( get_cabac_bypass( &h->cabac ) ) {
5105 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5110 if( get_cabac_bypass( &h->cabac ) )
5114 return get_cabac_bypass_sign( &h->cabac, -mvd );
5117 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5123 nza = h->left_cbp&0x100;
5124 nzb = h-> top_cbp&0x100;
5126 nza = (h->left_cbp>>(6+idx))&0x01;
5127 nzb = (h-> top_cbp>>(6+idx))&0x01;
5130 assert(cat == 1 || cat == 2 || cat == 4);
5131 nza = h->non_zero_count_cache[scan8[idx] - 1];
5132 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5141 return ctx + 4 * cat;
5144 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5145 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5146 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5147 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5148 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5151 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5152 static const int significant_coeff_flag_offset[2][6] = {
5153 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5154 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5156 static const int last_coeff_flag_offset[2][6] = {
5157 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5158 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5160 static const int coeff_abs_level_m1_offset[6] = {
5161 227+0, 227+10, 227+20, 227+30, 227+39, 426
5163 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5164 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5165 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5166 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5167 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5168 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5169 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5170 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5171 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5173 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5174 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5175 * map node ctx => cabac ctx for level=1 */
5176 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5177 /* map node ctx => cabac ctx for level>1 */
5178 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5179 static const uint8_t coeff_abs_level_transition[2][8] = {
5180 /* update node ctx after decoding a level=1 */
5181 { 1, 2, 3, 3, 4, 5, 6, 7 },
5182 /* update node ctx after decoding a level>1 */
5183 { 4, 4, 4, 4, 5, 6, 7, 7 }
5189 int coeff_count = 0;
5192 uint8_t *significant_coeff_ctx_base;
5193 uint8_t *last_coeff_ctx_base;
5194 uint8_t *abs_level_m1_ctx_base;
5197 #define CABAC_ON_STACK
5199 #ifdef CABAC_ON_STACK
5202 cc.range = h->cabac.range;
5203 cc.low = h->cabac.low;
5204 cc.bytestream= h->cabac.bytestream;
5206 #define CC &h->cabac
5210 /* cat: 0-> DC 16x16 n = 0
5211 * 1-> AC 16x16 n = luma4x4idx
5212 * 2-> Luma4x4 n = luma4x4idx
5213 * 3-> DC Chroma n = iCbCr
5214 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5215 * 5-> Luma8x8 n = 4 * luma8x8idx
5218 /* read coded block flag */
5219 if( is_dc || cat != 5 ) {
5220 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5222 h->non_zero_count_cache[scan8[n]] = 0;
5224 #ifdef CABAC_ON_STACK
5225 h->cabac.range = cc.range ;
5226 h->cabac.low = cc.low ;
5227 h->cabac.bytestream= cc.bytestream;
5233 significant_coeff_ctx_base = h->cabac_state
5234 + significant_coeff_flag_offset[MB_FIELD][cat];
5235 last_coeff_ctx_base = h->cabac_state
5236 + last_coeff_flag_offset[MB_FIELD][cat];
5237 abs_level_m1_ctx_base = h->cabac_state
5238 + coeff_abs_level_m1_offset[cat];
5240 if( !is_dc && cat == 5 ) {
5241 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5242 for(last= 0; last < coefs; last++) { \
5243 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5244 if( get_cabac( CC, sig_ctx )) { \
5245 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5246 index[coeff_count++] = last; \
5247 if( get_cabac( CC, last_ctx ) ) { \
5253 if( last == max_coeff -1 ) {\
5254 index[coeff_count++] = last;\
5256 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5257 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5258 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5260 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5262 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5264 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5267 assert(coeff_count > 0);
5271 h->cbp_table[h->mb_xy] |= 0x100;
5273 h->cbp_table[h->mb_xy] |= 0x40 << n;
5276 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5278 assert( cat == 1 || cat == 2 || cat == 4 );
5279 h->non_zero_count_cache[scan8[n]] = coeff_count;
5284 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5286 int j= scantable[index[--coeff_count]];
5288 if( get_cabac( CC, ctx ) == 0 ) {
5289 node_ctx = coeff_abs_level_transition[0][node_ctx];
5291 block[j] = get_cabac_bypass_sign( CC, -1);
5293 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5297 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5298 node_ctx = coeff_abs_level_transition[1][node_ctx];
5300 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5304 if( coeff_abs >= 15 ) {
5306 while( get_cabac_bypass( CC ) ) {
5312 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5318 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5320 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5323 } while( coeff_count );
5324 #ifdef CABAC_ON_STACK
5325 h->cabac.range = cc.range ;
5326 h->cabac.low = cc.low ;
5327 h->cabac.bytestream= cc.bytestream;
5332 #ifndef CONFIG_SMALL
5333 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5334 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5337 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5338 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5342 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5344 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5346 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5347 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5351 static inline void compute_mb_neighbors(H264Context *h)
5353 MpegEncContext * const s = &h->s;
5354 const int mb_xy = h->mb_xy;
5355 h->top_mb_xy = mb_xy - s->mb_stride;
5356 h->left_mb_xy[0] = mb_xy - 1;
5358 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5359 const int top_pair_xy = pair_xy - s->mb_stride;
5360 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5361 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5362 const int curr_mb_field_flag = MB_FIELD;
5363 const int bottom = (s->mb_y & 1);
5365 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5366 h->top_mb_xy -= s->mb_stride;
5368 if (!left_mb_field_flag == curr_mb_field_flag) {
5369 h->left_mb_xy[0] = pair_xy - 1;
5371 } else if (FIELD_PICTURE) {
5372 h->top_mb_xy -= s->mb_stride;
5378 * decodes a macroblock
5379 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5381 static int decode_mb_cabac(H264Context *h) {
5382 MpegEncContext * const s = &h->s;
5384 int mb_type, partition_count, cbp = 0;
5385 int dct8x8_allowed= h->pps.transform_8x8_mode;
5387 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5389 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5390 if( h->slice_type_nos != FF_I_TYPE ) {
5392 /* a skipped mb needs the aff flag from the following mb */
5393 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5394 predict_field_decoding_flag(h);
5395 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5396 skip = h->next_mb_skipped;
5398 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5399 /* read skip flags */
5401 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5402 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5403 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5404 if(!h->next_mb_skipped)
5405 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5410 h->cbp_table[mb_xy] = 0;
5411 h->chroma_pred_mode_table[mb_xy] = 0;
5412 h->last_qscale_diff = 0;
5419 if( (s->mb_y&1) == 0 )
5421 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5424 h->prev_mb_skipped = 0;
5426 compute_mb_neighbors(h);
5428 if( h->slice_type_nos == FF_B_TYPE ) {
5429 mb_type = decode_cabac_mb_type_b( h );
5431 partition_count= b_mb_type_info[mb_type].partition_count;
5432 mb_type= b_mb_type_info[mb_type].type;
5435 goto decode_intra_mb;
5437 } else if( h->slice_type_nos == FF_P_TYPE ) {
5438 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5440 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5441 /* P_L0_D16x16, P_8x8 */
5442 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5444 /* P_L0_D8x16, P_L0_D16x8 */
5445 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5447 partition_count= p_mb_type_info[mb_type].partition_count;
5448 mb_type= p_mb_type_info[mb_type].type;
5450 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5451 goto decode_intra_mb;
5454 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5455 if(h->slice_type == FF_SI_TYPE && mb_type)
5457 assert(h->slice_type_nos == FF_I_TYPE);
5459 partition_count = 0;
5460 cbp= i_mb_type_info[mb_type].cbp;
5461 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5462 mb_type= i_mb_type_info[mb_type].type;
5465 mb_type |= MB_TYPE_INTERLACED;
5467 h->slice_table[ mb_xy ]= h->slice_num;
5469 if(IS_INTRA_PCM(mb_type)) {
5472 // We assume these blocks are very rare so we do not optimize it.
5473 // FIXME The two following lines get the bitstream position in the cabac
5474 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5475 ptr= h->cabac.bytestream;
5476 if(h->cabac.low&0x1) ptr--;
5478 if(h->cabac.low&0x1FF) ptr--;
5481 // The pixels are stored in the same order as levels in h->mb array.
5482 memcpy(h->mb, ptr, 256); ptr+=256;
5484 memcpy(h->mb+128, ptr, 128); ptr+=128;
5487 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5489 // All blocks are present
5490 h->cbp_table[mb_xy] = 0x1ef;
5491 h->chroma_pred_mode_table[mb_xy] = 0;
5492 // In deblocking, the quantizer is 0
5493 s->current_picture.qscale_table[mb_xy]= 0;
5494 // All coeffs are present
5495 memset(h->non_zero_count[mb_xy], 16, 16);
5496 s->current_picture.mb_type[mb_xy]= mb_type;
5497 h->last_qscale_diff = 0;
5502 h->ref_count[0] <<= 1;
5503 h->ref_count[1] <<= 1;
5506 fill_caches(h, mb_type, 0);
5508 if( IS_INTRA( mb_type ) ) {
5510 if( IS_INTRA4x4( mb_type ) ) {
5511 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5512 mb_type |= MB_TYPE_8x8DCT;
5513 for( i = 0; i < 16; i+=4 ) {
5514 int pred = pred_intra_mode( h, i );
5515 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5516 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5519 for( i = 0; i < 16; i++ ) {
5520 int pred = pred_intra_mode( h, i );
5521 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5523 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5526 write_back_intra_pred_mode(h);
5527 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5529 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5530 if( h->intra16x16_pred_mode < 0 ) return -1;
5533 h->chroma_pred_mode_table[mb_xy] =
5534 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5536 pred_mode= check_intra_pred_mode( h, pred_mode );
5537 if( pred_mode < 0 ) return -1;
5538 h->chroma_pred_mode= pred_mode;
5540 } else if( partition_count == 4 ) {
5541 int i, j, sub_partition_count[4], list, ref[2][4];
5543 if( h->slice_type_nos == FF_B_TYPE ) {
5544 for( i = 0; i < 4; i++ ) {
5545 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5546 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5547 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5549 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5550 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5551 pred_direct_motion(h, &mb_type);
5552 h->ref_cache[0][scan8[4]] =
5553 h->ref_cache[1][scan8[4]] =
5554 h->ref_cache[0][scan8[12]] =
5555 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5556 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5557 for( i = 0; i < 4; i++ )
5558 if( IS_DIRECT(h->sub_mb_type[i]) )
5559 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5563 for( i = 0; i < 4; i++ ) {
5564 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5565 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5566 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5570 for( list = 0; list < h->list_count; list++ ) {
5571 for( i = 0; i < 4; i++ ) {
5572 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5573 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5574 if( h->ref_count[list] > 1 ){
5575 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5576 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5577 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5585 h->ref_cache[list][ scan8[4*i]+1 ]=
5586 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5591 dct8x8_allowed = get_dct8x8_allowed(h);
5593 for(list=0; list<h->list_count; list++){
5595 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5596 if(IS_DIRECT(h->sub_mb_type[i])){
5597 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5601 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5602 const int sub_mb_type= h->sub_mb_type[i];
5603 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5604 for(j=0; j<sub_partition_count[i]; j++){
5607 const int index= 4*i + block_width*j;
5608 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5609 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5610 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5612 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5613 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5614 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5616 if(IS_SUB_8X8(sub_mb_type)){
5618 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5620 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5623 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5625 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5626 }else if(IS_SUB_8X4(sub_mb_type)){
5627 mv_cache[ 1 ][0]= mx;
5628 mv_cache[ 1 ][1]= my;
5630 mvd_cache[ 1 ][0]= mx - mpx;
5631 mvd_cache[ 1 ][1]= my - mpy;
5632 }else if(IS_SUB_4X8(sub_mb_type)){
5633 mv_cache[ 8 ][0]= mx;
5634 mv_cache[ 8 ][1]= my;
5636 mvd_cache[ 8 ][0]= mx - mpx;
5637 mvd_cache[ 8 ][1]= my - mpy;
5639 mv_cache[ 0 ][0]= mx;
5640 mv_cache[ 0 ][1]= my;
5642 mvd_cache[ 0 ][0]= mx - mpx;
5643 mvd_cache[ 0 ][1]= my - mpy;
5646 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5647 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5648 p[0] = p[1] = p[8] = p[9] = 0;
5649 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5653 } else if( IS_DIRECT(mb_type) ) {
5654 pred_direct_motion(h, &mb_type);
5655 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5656 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5657 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5659 int list, mx, my, i, mpx, mpy;
5660 if(IS_16X16(mb_type)){
5661 for(list=0; list<h->list_count; list++){
5662 if(IS_DIR(mb_type, 0, list)){
5664 if(h->ref_count[list] > 1){
5665 ref= decode_cabac_mb_ref(h, list, 0);
5666 if(ref >= (unsigned)h->ref_count[list]){
5667 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5672 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5674 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5676 for(list=0; list<h->list_count; list++){
5677 if(IS_DIR(mb_type, 0, list)){
5678 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5680 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5681 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5682 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5684 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5685 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5687 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5690 else if(IS_16X8(mb_type)){
5691 for(list=0; list<h->list_count; list++){
5693 if(IS_DIR(mb_type, i, list)){
5695 if(h->ref_count[list] > 1){
5696 ref= decode_cabac_mb_ref( h, list, 8*i );
5697 if(ref >= (unsigned)h->ref_count[list]){
5698 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5703 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5705 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5708 for(list=0; list<h->list_count; list++){
5710 if(IS_DIR(mb_type, i, list)){
5711 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5712 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5713 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5714 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5716 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5717 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5719 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5720 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5725 assert(IS_8X16(mb_type));
5726 for(list=0; list<h->list_count; list++){
5728 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5730 if(h->ref_count[list] > 1){
5731 ref= decode_cabac_mb_ref( h, list, 4*i );
5732 if(ref >= (unsigned)h->ref_count[list]){
5733 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5738 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5740 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5743 for(list=0; list<h->list_count; list++){
5745 if(IS_DIR(mb_type, i, list)){
5746 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5747 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5748 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5750 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5751 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5752 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5754 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5755 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5762 if( IS_INTER( mb_type ) ) {
5763 h->chroma_pred_mode_table[mb_xy] = 0;
5764 write_back_motion( h, mb_type );
5767 if( !IS_INTRA16x16( mb_type ) ) {
5768 cbp = decode_cabac_mb_cbp_luma( h );
5770 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5773 h->cbp_table[mb_xy] = h->cbp = cbp;
5775 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5776 if( decode_cabac_mb_transform_size( h ) )
5777 mb_type |= MB_TYPE_8x8DCT;
5779 s->current_picture.mb_type[mb_xy]= mb_type;
5781 if( cbp || IS_INTRA16x16( mb_type ) ) {
5782 const uint8_t *scan, *scan8x8, *dc_scan;
5783 const uint32_t *qmul;
5786 if(IS_INTERLACED(mb_type)){
5787 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5788 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5789 dc_scan= luma_dc_field_scan;
5791 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5792 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5793 dc_scan= luma_dc_zigzag_scan;
5796 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5797 if( dqp == INT_MIN ){
5798 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5802 if(((unsigned)s->qscale) > 51){
5803 if(s->qscale<0) s->qscale+= 52;
5804 else s->qscale-= 52;
5806 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5807 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5809 if( IS_INTRA16x16( mb_type ) ) {
5811 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5812 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5815 qmul = h->dequant4_coeff[0][s->qscale];
5816 for( i = 0; i < 16; i++ ) {
5817 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5818 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5821 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5825 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5826 if( cbp & (1<<i8x8) ) {
5827 if( IS_8x8DCT(mb_type) ) {
5828 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5829 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5831 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5832 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5833 const int index = 4*i8x8 + i4x4;
5834 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5836 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5837 //STOP_TIMER("decode_residual")
5841 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5842 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5849 for( c = 0; c < 2; c++ ) {
5850 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5851 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5857 for( c = 0; c < 2; c++ ) {
5858 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5859 for( i = 0; i < 4; i++ ) {
5860 const int index = 16 + 4 * c + i;
5861 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5862 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5866 uint8_t * const nnz= &h->non_zero_count_cache[0];
5867 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5868 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5871 uint8_t * const nnz= &h->non_zero_count_cache[0];
5872 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5873 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5874 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5875 h->last_qscale_diff = 0;
5878 s->current_picture.qscale_table[mb_xy]= s->qscale;
5879 write_back_non_zero_count(h);
5882 h->ref_count[0] >>= 1;
5883 h->ref_count[1] >>= 1;
5890 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5891 const int index_a = qp + h->slice_alpha_c0_offset;
5892 const int alpha = (alpha_table+52)[index_a];
5893 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5897 tc[0] = (tc0_table+52)[index_a][bS[0]];
5898 tc[1] = (tc0_table+52)[index_a][bS[1]];
5899 tc[2] = (tc0_table+52)[index_a][bS[2]];
5900 tc[3] = (tc0_table+52)[index_a][bS[3]];
5901 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5903 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5906 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5907 const int index_a = qp + h->slice_alpha_c0_offset;
5908 const int alpha = (alpha_table+52)[index_a];
5909 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5913 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5914 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5915 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5916 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5917 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5919 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5923 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5925 for( i = 0; i < 16; i++, pix += stride) {
5931 int bS_index = (i >> 1);
5934 bS_index |= (i & 1);
5937 if( bS[bS_index] == 0 ) {
5941 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5942 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5943 alpha = (alpha_table+52)[index_a];
5944 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5946 if( bS[bS_index] < 4 ) {
5947 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5948 const int p0 = pix[-1];
5949 const int p1 = pix[-2];
5950 const int p2 = pix[-3];
5951 const int q0 = pix[0];
5952 const int q1 = pix[1];
5953 const int q2 = pix[2];
5955 if( FFABS( p0 - q0 ) < alpha &&
5956 FFABS( p1 - p0 ) < beta &&
5957 FFABS( q1 - q0 ) < beta ) {
5961 if( FFABS( p2 - p0 ) < beta ) {
5962 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5965 if( FFABS( q2 - q0 ) < beta ) {
5966 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5970 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5971 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5972 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5973 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5976 const int p0 = pix[-1];
5977 const int p1 = pix[-2];
5978 const int p2 = pix[-3];
5980 const int q0 = pix[0];
5981 const int q1 = pix[1];
5982 const int q2 = pix[2];
5984 if( FFABS( p0 - q0 ) < alpha &&
5985 FFABS( p1 - p0 ) < beta &&
5986 FFABS( q1 - q0 ) < beta ) {
5988 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5989 if( FFABS( p2 - p0 ) < beta)
5991 const int p3 = pix[-4];
5993 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5994 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5995 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5998 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6000 if( FFABS( q2 - q0 ) < beta)
6002 const int q3 = pix[3];
6004 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6005 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6006 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6009 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6013 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6014 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6016 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6021 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6023 for( i = 0; i < 8; i++, pix += stride) {
6031 if( bS[bS_index] == 0 ) {
6035 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6036 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6037 alpha = (alpha_table+52)[index_a];
6038 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6040 if( bS[bS_index] < 4 ) {
6041 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6042 const int p0 = pix[-1];
6043 const int p1 = pix[-2];
6044 const int q0 = pix[0];
6045 const int q1 = pix[1];
6047 if( FFABS( p0 - q0 ) < alpha &&
6048 FFABS( p1 - p0 ) < beta &&
6049 FFABS( q1 - q0 ) < beta ) {
6050 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6052 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6053 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6054 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6057 const int p0 = pix[-1];
6058 const int p1 = pix[-2];
6059 const int q0 = pix[0];
6060 const int q1 = pix[1];
6062 if( FFABS( p0 - q0 ) < alpha &&
6063 FFABS( p1 - p0 ) < beta &&
6064 FFABS( q1 - q0 ) < beta ) {
6066 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6067 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6068 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6074 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6075 const int index_a = qp + h->slice_alpha_c0_offset;
6076 const int alpha = (alpha_table+52)[index_a];
6077 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6081 tc[0] = (tc0_table+52)[index_a][bS[0]];
6082 tc[1] = (tc0_table+52)[index_a][bS[1]];
6083 tc[2] = (tc0_table+52)[index_a][bS[2]];
6084 tc[3] = (tc0_table+52)[index_a][bS[3]];
6085 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6087 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6091 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6092 const int index_a = qp + h->slice_alpha_c0_offset;
6093 const int alpha = (alpha_table+52)[index_a];
6094 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6098 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6099 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6100 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6101 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6102 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6104 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6108 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6109 MpegEncContext * const s = &h->s;
6110 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6112 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6116 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6117 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6118 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6119 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6120 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6123 assert(!FRAME_MBAFF);
6125 mb_type = s->current_picture.mb_type[mb_xy];
6126 qp = s->current_picture.qscale_table[mb_xy];
6127 qp0 = s->current_picture.qscale_table[mb_xy-1];
6128 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6129 qpc = get_chroma_qp( h, 0, qp );
6130 qpc0 = get_chroma_qp( h, 0, qp0 );
6131 qpc1 = get_chroma_qp( h, 0, qp1 );
6132 qp0 = (qp + qp0 + 1) >> 1;
6133 qp1 = (qp + qp1 + 1) >> 1;
6134 qpc0 = (qpc + qpc0 + 1) >> 1;
6135 qpc1 = (qpc + qpc1 + 1) >> 1;
6136 qp_thresh = 15 - h->slice_alpha_c0_offset;
6137 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6138 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6141 if( IS_INTRA(mb_type) ) {
6142 int16_t bS4[4] = {4,4,4,4};
6143 int16_t bS3[4] = {3,3,3,3};
6144 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6145 if( IS_8x8DCT(mb_type) ) {
6146 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6147 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6148 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6149 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6151 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6152 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6153 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6154 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6155 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6156 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6157 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6158 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6160 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6161 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6162 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6163 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6164 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6165 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6166 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6167 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6170 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6171 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6173 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6175 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6177 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6178 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6179 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6180 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6182 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6183 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6184 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6185 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6187 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6188 bSv[0][0] = 0x0004000400040004ULL;
6189 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6190 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6192 #define FILTER(hv,dir,edge)\
6193 if(bSv[dir][edge]) {\
6194 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6196 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6197 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6203 } else if( IS_8x8DCT(mb_type) ) {
6223 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6224 MpegEncContext * const s = &h->s;
6226 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6227 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6228 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6229 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6230 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6232 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6233 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6234 // how often to recheck mv-based bS when iterating between edges
6235 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6236 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6237 // how often to recheck mv-based bS when iterating along each edge
6238 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6240 if (first_vertical_edge_done) {
6244 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6247 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6248 && !IS_INTERLACED(mb_type)
6249 && IS_INTERLACED(mbm_type)
6251 // This is a special case in the norm where the filtering must
6252 // be done twice (one each of the field) even if we are in a
6253 // frame macroblock.
6255 static const int nnz_idx[4] = {4,5,6,3};
6256 unsigned int tmp_linesize = 2 * linesize;
6257 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6258 int mbn_xy = mb_xy - 2 * s->mb_stride;
6263 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6264 if( IS_INTRA(mb_type) ||
6265 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6266 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6268 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6269 for( i = 0; i < 4; i++ ) {
6270 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6271 mbn_nnz[nnz_idx[i]] != 0 )
6277 // Do not use s->qscale as luma quantizer because it has not the same
6278 // value in IPCM macroblocks.
6279 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6280 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6281 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6282 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6283 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6284 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6285 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6286 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6293 for( edge = start; edge < edges; edge++ ) {
6294 /* mbn_xy: neighbor macroblock */
6295 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6296 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6297 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6301 if( (edge&1) && IS_8x8DCT(mb_type) )
6304 if( IS_INTRA(mb_type) ||
6305 IS_INTRA(mbn_type) ) {
6308 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6309 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6318 bS[0] = bS[1] = bS[2] = bS[3] = value;
6323 if( edge & mask_edge ) {
6324 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6327 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6328 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6331 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6332 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6333 int bn_idx= b_idx - (dir ? 8:1);
6336 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6337 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6338 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6339 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6342 if(h->slice_type_nos == FF_B_TYPE && v){
6344 for( l = 0; !v && l < 2; l++ ) {
6346 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6347 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6348 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6352 bS[0] = bS[1] = bS[2] = bS[3] = v;
6358 for( i = 0; i < 4; i++ ) {
6359 int x = dir == 0 ? edge : i;
6360 int y = dir == 0 ? i : edge;
6361 int b_idx= 8 + 4 + x + 8*y;
6362 int bn_idx= b_idx - (dir ? 8:1);
6364 if( h->non_zero_count_cache[b_idx] |
6365 h->non_zero_count_cache[bn_idx] ) {
6371 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6372 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6373 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6374 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6380 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6382 for( l = 0; l < 2; l++ ) {
6384 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6385 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6386 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6395 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6400 // Do not use s->qscale as luma quantizer because it has not the same
6401 // value in IPCM macroblocks.
6402 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6403 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6404 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6405 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6407 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6408 if( (edge&1) == 0 ) {
6409 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6410 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6411 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6412 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6415 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6416 if( (edge&1) == 0 ) {
6417 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6418 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6419 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6420 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6426 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6427 MpegEncContext * const s = &h->s;
6428 const int mb_xy= mb_x + mb_y*s->mb_stride;
6429 const int mb_type = s->current_picture.mb_type[mb_xy];
6430 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6431 int first_vertical_edge_done = 0;
6434 //for sufficiently low qp, filtering wouldn't do anything
6435 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6437 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6438 int qp = s->current_picture.qscale_table[mb_xy];
6440 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6441 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6446 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6447 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6448 int top_type, left_type[2];
6449 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6450 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6451 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6453 if(IS_8x8DCT(top_type)){
6454 h->non_zero_count_cache[4+8*0]=
6455 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6456 h->non_zero_count_cache[6+8*0]=
6457 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6459 if(IS_8x8DCT(left_type[0])){
6460 h->non_zero_count_cache[3+8*1]=
6461 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6463 if(IS_8x8DCT(left_type[1])){
6464 h->non_zero_count_cache[3+8*3]=
6465 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6468 if(IS_8x8DCT(mb_type)){
6469 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6470 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6472 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6473 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6475 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6476 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6478 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6479 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6484 // left mb is in picture
6485 && h->slice_table[mb_xy-1] != 0xFFFF
6486 // and current and left pair do not have the same interlaced type
6487 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6488 // and left mb is in the same slice if deblocking_filter == 2
6489 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6490 /* First vertical edge is different in MBAFF frames
6491 * There are 8 different bS to compute and 2 different Qp
6493 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6494 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6499 int mb_qp, mbn0_qp, mbn1_qp;
6501 first_vertical_edge_done = 1;
6503 if( IS_INTRA(mb_type) )
6504 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6506 for( i = 0; i < 8; i++ ) {
6507 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6509 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6511 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6512 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6513 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6515 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6522 mb_qp = s->current_picture.qscale_table[mb_xy];
6523 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6524 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6525 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6526 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6527 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6528 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6529 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6530 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6531 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6532 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6533 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6534 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6537 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6538 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6539 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6540 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6541 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6545 for( dir = 0; dir < 2; dir++ )
6546 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6548 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6549 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6553 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6554 H264Context *h = *(void**)arg;
6555 MpegEncContext * const s = &h->s;
6556 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6560 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6561 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6563 if( h->pps.cabac ) {
6567 align_get_bits( &s->gb );
6570 ff_init_cabac_states( &h->cabac);
6571 ff_init_cabac_decoder( &h->cabac,
6572 s->gb.buffer + get_bits_count(&s->gb)/8,
6573 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6574 /* calculate pre-state */
6575 for( i= 0; i < 460; i++ ) {
6577 if( h->slice_type_nos == FF_I_TYPE )
6578 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6580 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6583 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6585 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6590 int ret = decode_mb_cabac(h);
6592 //STOP_TIMER("decode_mb_cabac")
6594 if(ret>=0) hl_decode_mb(h);
6596 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6599 ret = decode_mb_cabac(h);
6601 if(ret>=0) hl_decode_mb(h);
6604 eos = get_cabac_terminate( &h->cabac );
6606 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6607 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6608 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6612 if( ++s->mb_x >= s->mb_width ) {
6614 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6616 if(FIELD_OR_MBAFF_PICTURE) {
6621 if( eos || s->mb_y >= s->mb_height ) {
6622 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6623 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6630 int ret = decode_mb_cavlc(h);
6632 if(ret>=0) hl_decode_mb(h);
6634 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6636 ret = decode_mb_cavlc(h);
6638 if(ret>=0) hl_decode_mb(h);
6643 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6644 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6649 if(++s->mb_x >= s->mb_width){
6651 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6653 if(FIELD_OR_MBAFF_PICTURE) {
6656 if(s->mb_y >= s->mb_height){
6657 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6659 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6660 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6664 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6671 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6672 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6673 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6674 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6678 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6687 for(;s->mb_y < s->mb_height; s->mb_y++){
6688 for(;s->mb_x < s->mb_width; s->mb_x++){
6689 int ret= decode_mb(h);
6694 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6695 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6700 if(++s->mb_x >= s->mb_width){
6702 if(++s->mb_y >= s->mb_height){
6703 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6704 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6708 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6715 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6716 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6717 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6721 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6728 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6731 return -1; //not reached
6734 static int decode_picture_timing(H264Context *h){
6735 MpegEncContext * const s = &h->s;
6736 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6737 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6738 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6740 if(h->sps.pic_struct_present_flag){
6741 unsigned int i, num_clock_ts;
6742 h->sei_pic_struct = get_bits(&s->gb, 4);
6744 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6747 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6749 for (i = 0 ; i < num_clock_ts ; i++){
6750 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6751 unsigned int full_timestamp_flag;
6752 skip_bits(&s->gb, 2); /* ct_type */
6753 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6754 skip_bits(&s->gb, 5); /* counting_type */
6755 full_timestamp_flag = get_bits(&s->gb, 1);
6756 skip_bits(&s->gb, 1); /* discontinuity_flag */
6757 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6758 skip_bits(&s->gb, 8); /* n_frames */
6759 if(full_timestamp_flag){
6760 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6761 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6762 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6764 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6765 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6766 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6767 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6768 if(get_bits(&s->gb, 1)) /* hours_flag */
6769 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6773 if(h->sps.time_offset_length > 0)
6774 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6781 static int decode_unregistered_user_data(H264Context *h, int size){
6782 MpegEncContext * const s = &h->s;
6783 uint8_t user_data[16+256];
6789 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6790 user_data[i]= get_bits(&s->gb, 8);
6794 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6795 if(e==1 && build>=0)
6796 h->x264_build= build;
6798 if(s->avctx->debug & FF_DEBUG_BUGS)
6799 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6802 skip_bits(&s->gb, 8);
6807 static int decode_sei(H264Context *h){
6808 MpegEncContext * const s = &h->s;
6810 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6815 type+= show_bits(&s->gb, 8);
6816 }while(get_bits(&s->gb, 8) == 255);
6820 size+= show_bits(&s->gb, 8);
6821 }while(get_bits(&s->gb, 8) == 255);
6824 case 1: // Picture timing SEI
6825 if(decode_picture_timing(h) < 0)
6829 if(decode_unregistered_user_data(h, size) < 0)
6833 skip_bits(&s->gb, 8*size);
6836 //FIXME check bits here
6837 align_get_bits(&s->gb);
6843 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6844 MpegEncContext * const s = &h->s;
6846 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6848 if(cpb_count > 32U){
6849 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6853 get_bits(&s->gb, 4); /* bit_rate_scale */
6854 get_bits(&s->gb, 4); /* cpb_size_scale */
6855 for(i=0; i<cpb_count; i++){
6856 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6857 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6858 get_bits1(&s->gb); /* cbr_flag */
6860 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6861 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6862 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6863 sps->time_offset_length = get_bits(&s->gb, 5);
6867 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6868 MpegEncContext * const s = &h->s;
6869 int aspect_ratio_info_present_flag;
6870 unsigned int aspect_ratio_idc;
6872 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6874 if( aspect_ratio_info_present_flag ) {
6875 aspect_ratio_idc= get_bits(&s->gb, 8);
6876 if( aspect_ratio_idc == EXTENDED_SAR ) {
6877 sps->sar.num= get_bits(&s->gb, 16);
6878 sps->sar.den= get_bits(&s->gb, 16);
6879 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6880 sps->sar= pixel_aspect[aspect_ratio_idc];
6882 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6889 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6891 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6892 get_bits1(&s->gb); /* overscan_appropriate_flag */
6895 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6896 get_bits(&s->gb, 3); /* video_format */
6897 get_bits1(&s->gb); /* video_full_range_flag */
6898 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6899 get_bits(&s->gb, 8); /* colour_primaries */
6900 get_bits(&s->gb, 8); /* transfer_characteristics */
6901 get_bits(&s->gb, 8); /* matrix_coefficients */
6905 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6906 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6907 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6910 sps->timing_info_present_flag = get_bits1(&s->gb);
6911 if(sps->timing_info_present_flag){
6912 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6913 sps->time_scale = get_bits_long(&s->gb, 32);
6914 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6917 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6918 if(sps->nal_hrd_parameters_present_flag)
6919 if(decode_hrd_parameters(h, sps) < 0)
6921 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6922 if(sps->vcl_hrd_parameters_present_flag)
6923 if(decode_hrd_parameters(h, sps) < 0)
6925 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6926 get_bits1(&s->gb); /* low_delay_hrd_flag */
6927 sps->pic_struct_present_flag = get_bits1(&s->gb);
6929 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6930 if(sps->bitstream_restriction_flag){
6931 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6932 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6933 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6934 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6935 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6936 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6937 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6939 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6940 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6948 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6949 const uint8_t *jvt_list, const uint8_t *fallback_list){
6950 MpegEncContext * const s = &h->s;
6951 int i, last = 8, next = 8;
6952 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6953 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6954 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6956 for(i=0;i<size;i++){
6958 next = (last + get_se_golomb(&s->gb)) & 0xff;
6959 if(!i && !next){ /* matrix not written, we use the preset one */
6960 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6963 last = factors[scan[i]] = next ? next : last;
6967 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6968 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6969 MpegEncContext * const s = &h->s;
6970 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6971 const uint8_t *fallback[4] = {
6972 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6973 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6974 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6975 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6977 if(get_bits1(&s->gb)){
6978 sps->scaling_matrix_present |= is_sps;
6979 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6980 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6981 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6982 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6983 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6984 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6985 if(is_sps || pps->transform_8x8_mode){
6986 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6987 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6992 static inline int decode_seq_parameter_set(H264Context *h){
6993 MpegEncContext * const s = &h->s;
6994 int profile_idc, level_idc;
6995 unsigned int sps_id;
6999 profile_idc= get_bits(&s->gb, 8);
7000 get_bits1(&s->gb); //constraint_set0_flag
7001 get_bits1(&s->gb); //constraint_set1_flag
7002 get_bits1(&s->gb); //constraint_set2_flag
7003 get_bits1(&s->gb); //constraint_set3_flag
7004 get_bits(&s->gb, 4); // reserved
7005 level_idc= get_bits(&s->gb, 8);
7006 sps_id= get_ue_golomb_31(&s->gb);
7008 if(sps_id >= MAX_SPS_COUNT) {
7009 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7012 sps= av_mallocz(sizeof(SPS));
7016 sps->profile_idc= profile_idc;
7017 sps->level_idc= level_idc;
7019 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7020 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7021 sps->scaling_matrix_present = 0;
7023 if(sps->profile_idc >= 100){ //high profile
7024 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7025 if(sps->chroma_format_idc == 3)
7026 get_bits1(&s->gb); //residual_color_transform_flag
7027 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7028 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7029 sps->transform_bypass = get_bits1(&s->gb);
7030 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7032 sps->chroma_format_idc= 1;
7035 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7036 sps->poc_type= get_ue_golomb_31(&s->gb);
7038 if(sps->poc_type == 0){ //FIXME #define
7039 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7040 } else if(sps->poc_type == 1){//FIXME #define
7041 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7042 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7043 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7044 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7046 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7047 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7051 for(i=0; i<sps->poc_cycle_length; i++)
7052 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7053 }else if(sps->poc_type != 2){
7054 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7058 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7059 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7060 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7063 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7064 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7065 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7066 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7067 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7068 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7072 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7073 if(!sps->frame_mbs_only_flag)
7074 sps->mb_aff= get_bits1(&s->gb);
7078 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7080 #ifndef ALLOW_INTERLACE
7082 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7084 sps->crop= get_bits1(&s->gb);
7086 sps->crop_left = get_ue_golomb(&s->gb);
7087 sps->crop_right = get_ue_golomb(&s->gb);
7088 sps->crop_top = get_ue_golomb(&s->gb);
7089 sps->crop_bottom= get_ue_golomb(&s->gb);
7090 if(sps->crop_left || sps->crop_top){
7091 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7093 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7094 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7100 sps->crop_bottom= 0;
7103 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7104 if( sps->vui_parameters_present_flag )
7105 decode_vui_parameters(h, sps);
7107 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7108 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7109 sps_id, sps->profile_idc, sps->level_idc,
7111 sps->ref_frame_count,
7112 sps->mb_width, sps->mb_height,
7113 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7114 sps->direct_8x8_inference_flag ? "8B8" : "",
7115 sps->crop_left, sps->crop_right,
7116 sps->crop_top, sps->crop_bottom,
7117 sps->vui_parameters_present_flag ? "VUI" : "",
7118 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7121 av_free(h->sps_buffers[sps_id]);
7122 h->sps_buffers[sps_id]= sps;
7130 build_qp_table(PPS *pps, int t, int index)
7133 for(i = 0; i < 52; i++)
7134 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7137 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7138 MpegEncContext * const s = &h->s;
7139 unsigned int pps_id= get_ue_golomb(&s->gb);
7142 if(pps_id >= MAX_PPS_COUNT) {
7143 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7147 pps= av_mallocz(sizeof(PPS));
7150 pps->sps_id= get_ue_golomb_31(&s->gb);
7151 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7152 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7156 pps->cabac= get_bits1(&s->gb);
7157 pps->pic_order_present= get_bits1(&s->gb);
7158 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7159 if(pps->slice_group_count > 1 ){
7160 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7161 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7162 switch(pps->mb_slice_group_map_type){
7165 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7166 | run_length[ i ] |1 |ue(v) |
7171 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7173 | top_left_mb[ i ] |1 |ue(v) |
7174 | bottom_right_mb[ i ] |1 |ue(v) |
7182 | slice_group_change_direction_flag |1 |u(1) |
7183 | slice_group_change_rate_minus1 |1 |ue(v) |
7188 | slice_group_id_cnt_minus1 |1 |ue(v) |
7189 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7191 | slice_group_id[ i ] |1 |u(v) |
7196 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7197 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7198 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7199 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7203 pps->weighted_pred= get_bits1(&s->gb);
7204 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7205 pps->init_qp= get_se_golomb(&s->gb) + 26;
7206 pps->init_qs= get_se_golomb(&s->gb) + 26;
7207 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7208 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7209 pps->constrained_intra_pred= get_bits1(&s->gb);
7210 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7212 pps->transform_8x8_mode= 0;
7213 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7214 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7215 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7217 if(get_bits_count(&s->gb) < bit_length){
7218 pps->transform_8x8_mode= get_bits1(&s->gb);
7219 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7220 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7222 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7225 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7226 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7227 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7228 h->pps.chroma_qp_diff= 1;
7230 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7231 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7232 pps_id, pps->sps_id,
7233 pps->cabac ? "CABAC" : "CAVLC",
7234 pps->slice_group_count,
7235 pps->ref_count[0], pps->ref_count[1],
7236 pps->weighted_pred ? "weighted" : "",
7237 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7238 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7239 pps->constrained_intra_pred ? "CONSTR" : "",
7240 pps->redundant_pic_cnt_present ? "REDU" : "",
7241 pps->transform_8x8_mode ? "8x8DCT" : ""
7245 av_free(h->pps_buffers[pps_id]);
7246 h->pps_buffers[pps_id]= pps;
7254 * Call decode_slice() for each context.
7256 * @param h h264 master context
7257 * @param context_count number of contexts to execute
7259 static void execute_decode_slices(H264Context *h, int context_count){
7260 MpegEncContext * const s = &h->s;
7261 AVCodecContext * const avctx= s->avctx;
7265 if(context_count == 1) {
7266 decode_slice(avctx, &h);
7268 for(i = 1; i < context_count; i++) {
7269 hx = h->thread_context[i];
7270 hx->s.error_recognition = avctx->error_recognition;
7271 hx->s.error_count = 0;
7274 avctx->execute(avctx, (void *)decode_slice,
7275 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7277 /* pull back stuff from slices to master context */
7278 hx = h->thread_context[context_count - 1];
7279 s->mb_x = hx->s.mb_x;
7280 s->mb_y = hx->s.mb_y;
7281 s->dropable = hx->s.dropable;
7282 s->picture_structure = hx->s.picture_structure;
7283 for(i = 1; i < context_count; i++)
7284 h->s.error_count += h->thread_context[i]->s.error_count;
7289 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7290 MpegEncContext * const s = &h->s;
7291 AVCodecContext * const avctx= s->avctx;
7293 H264Context *hx; ///< thread context
7294 int context_count = 0;
7296 h->max_contexts = avctx->thread_count;
7299 for(i=0; i<50; i++){
7300 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7303 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7304 h->current_slice = 0;
7305 if (!s->first_field)
7306 s->current_picture_ptr= NULL;
7318 if(buf_index >= buf_size) break;
7320 for(i = 0; i < h->nal_length_size; i++)
7321 nalsize = (nalsize << 8) | buf[buf_index++];
7322 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7327 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7332 // start code prefix search
7333 for(; buf_index + 3 < buf_size; buf_index++){
7334 // This should always succeed in the first iteration.
7335 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7339 if(buf_index+3 >= buf_size) break;
7344 hx = h->thread_context[context_count];
7346 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7347 if (ptr==NULL || dst_length < 0){
7350 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7352 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7354 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7355 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7358 if (h->is_avc && (nalsize != consumed)){
7359 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7363 buf_index += consumed;
7365 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7366 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7371 switch(hx->nal_unit_type){
7373 if (h->nal_unit_type != NAL_IDR_SLICE) {
7374 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7377 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7379 init_get_bits(&hx->s.gb, ptr, bit_length);
7381 hx->inter_gb_ptr= &hx->s.gb;
7382 hx->s.data_partitioning = 0;
7384 if((err = decode_slice_header(hx, h)))
7387 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7388 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7389 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7390 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7391 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7392 && avctx->skip_frame < AVDISCARD_ALL)
7396 init_get_bits(&hx->s.gb, ptr, bit_length);
7398 hx->inter_gb_ptr= NULL;
7399 hx->s.data_partitioning = 1;
7401 err = decode_slice_header(hx, h);
7404 init_get_bits(&hx->intra_gb, ptr, bit_length);
7405 hx->intra_gb_ptr= &hx->intra_gb;
7408 init_get_bits(&hx->inter_gb, ptr, bit_length);
7409 hx->inter_gb_ptr= &hx->inter_gb;
7411 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7412 && s->context_initialized
7414 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7415 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7416 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7417 && avctx->skip_frame < AVDISCARD_ALL)
7421 init_get_bits(&s->gb, ptr, bit_length);
7425 init_get_bits(&s->gb, ptr, bit_length);
7426 decode_seq_parameter_set(h);
7428 if(s->flags& CODEC_FLAG_LOW_DELAY)
7431 if(avctx->has_b_frames < 2)
7432 avctx->has_b_frames= !s->low_delay;
7435 init_get_bits(&s->gb, ptr, bit_length);
7437 decode_picture_parameter_set(h, bit_length);
7441 case NAL_END_SEQUENCE:
7442 case NAL_END_STREAM:
7443 case NAL_FILLER_DATA:
7445 case NAL_AUXILIARY_SLICE:
7448 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7451 if(context_count == h->max_contexts) {
7452 execute_decode_slices(h, context_count);
7457 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7459 /* Slice could not be decoded in parallel mode, copy down
7460 * NAL unit stuff to context 0 and restart. Note that
7461 * rbsp_buffer is not transferred, but since we no longer
7462 * run in parallel mode this should not be an issue. */
7463 h->nal_unit_type = hx->nal_unit_type;
7464 h->nal_ref_idc = hx->nal_ref_idc;
7470 execute_decode_slices(h, context_count);
7475 * returns the number of bytes consumed for building the current frame
7477 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7478 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7479 if(pos+10>buf_size) pos=buf_size; // oops ;)
7484 static int decode_frame(AVCodecContext *avctx,
7485 void *data, int *data_size,
7486 const uint8_t *buf, int buf_size)
7488 H264Context *h = avctx->priv_data;
7489 MpegEncContext *s = &h->s;
7490 AVFrame *pict = data;
7493 s->flags= avctx->flags;
7494 s->flags2= avctx->flags2;
7496 /* end of stream, output what is still in the buffers */
7497 if (buf_size == 0) {
7501 //FIXME factorize this with the output code below
7502 out = h->delayed_pic[0];
7504 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7505 if(h->delayed_pic[i]->poc < out->poc){
7506 out = h->delayed_pic[i];
7510 for(i=out_idx; h->delayed_pic[i]; i++)
7511 h->delayed_pic[i] = h->delayed_pic[i+1];
7514 *data_size = sizeof(AVFrame);
7515 *pict= *(AVFrame*)out;
7521 if(h->is_avc && !h->got_avcC) {
7522 int i, cnt, nalsize;
7523 unsigned char *p = avctx->extradata;
7524 if(avctx->extradata_size < 7) {
7525 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7529 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7532 /* sps and pps in the avcC always have length coded with 2 bytes,
7533 so put a fake nal_length_size = 2 while parsing them */
7534 h->nal_length_size = 2;
7535 // Decode sps from avcC
7536 cnt = *(p+5) & 0x1f; // Number of sps
7538 for (i = 0; i < cnt; i++) {
7539 nalsize = AV_RB16(p) + 2;
7540 if(decode_nal_units(h, p, nalsize) < 0) {
7541 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7546 // Decode pps from avcC
7547 cnt = *(p++); // Number of pps
7548 for (i = 0; i < cnt; i++) {
7549 nalsize = AV_RB16(p) + 2;
7550 if(decode_nal_units(h, p, nalsize) != nalsize) {
7551 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7556 // Now store right nal length size, that will be use to parse all other nals
7557 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7558 // Do not reparse avcC
7562 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7563 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7568 buf_index=decode_nal_units(h, buf, buf_size);
7572 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7573 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7574 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7578 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7579 Picture *out = s->current_picture_ptr;
7580 Picture *cur = s->current_picture_ptr;
7581 int i, pics, cross_idr, out_of_order, out_idx;
7585 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7586 s->current_picture_ptr->pict_type= s->pict_type;
7589 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7590 h->prev_poc_msb= h->poc_msb;
7591 h->prev_poc_lsb= h->poc_lsb;
7593 h->prev_frame_num_offset= h->frame_num_offset;
7594 h->prev_frame_num= h->frame_num;
7597 * FIXME: Error handling code does not seem to support interlaced
7598 * when slices span multiple rows
7599 * The ff_er_add_slice calls don't work right for bottom
7600 * fields; they cause massive erroneous error concealing
7601 * Error marking covers both fields (top and bottom).
7602 * This causes a mismatched s->error_count
7603 * and a bad error table. Further, the error count goes to
7604 * INT_MAX when called for bottom field, because mb_y is
7605 * past end by one (callers fault) and resync_mb_y != 0
7606 * causes problems for the first MB line, too.
7613 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7614 /* Wait for second field. */
7618 cur->repeat_pict = 0;
7620 /* Signal interlacing information externally. */
7621 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7622 if(h->sps.pic_struct_present_flag){
7623 switch (h->sei_pic_struct)
7625 case SEI_PIC_STRUCT_FRAME:
7626 cur->interlaced_frame = 0;
7628 case SEI_PIC_STRUCT_TOP_FIELD:
7629 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7630 case SEI_PIC_STRUCT_TOP_BOTTOM:
7631 case SEI_PIC_STRUCT_BOTTOM_TOP:
7632 cur->interlaced_frame = 1;
7634 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7635 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7636 // Signal the possibility of telecined film externally (pic_struct 5,6)
7637 // From these hints, let the applications decide if they apply deinterlacing.
7638 cur->repeat_pict = 1;
7639 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7641 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7642 // Force progressive here, as doubling interlaced frame is a bad idea.
7643 cur->interlaced_frame = 0;
7644 cur->repeat_pict = 2;
7646 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7647 cur->interlaced_frame = 0;
7648 cur->repeat_pict = 4;
7652 /* Derive interlacing flag from used decoding process. */
7653 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7656 if (cur->field_poc[0] != cur->field_poc[1]){
7657 /* Derive top_field_first from field pocs. */
7658 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7660 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7661 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7662 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7663 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7664 cur->top_field_first = 1;
7666 cur->top_field_first = 0;
7668 /* Most likely progressive */
7669 cur->top_field_first = 0;
7673 //FIXME do something with unavailable reference frames
7675 /* Sort B-frames into display order */
7677 if(h->sps.bitstream_restriction_flag
7678 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7679 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7683 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7684 && !h->sps.bitstream_restriction_flag){
7685 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7690 while(h->delayed_pic[pics]) pics++;
7692 assert(pics <= MAX_DELAYED_PIC_COUNT);
7694 h->delayed_pic[pics++] = cur;
7695 if(cur->reference == 0)
7696 cur->reference = DELAYED_PIC_REF;
7698 out = h->delayed_pic[0];
7700 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7701 if(h->delayed_pic[i]->poc < out->poc){
7702 out = h->delayed_pic[i];
7705 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7707 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7709 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7711 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7713 ((!cross_idr && out->poc > h->outputed_poc + 2)
7714 || cur->pict_type == FF_B_TYPE)))
7717 s->avctx->has_b_frames++;
7720 if(out_of_order || pics > s->avctx->has_b_frames){
7721 out->reference &= ~DELAYED_PIC_REF;
7722 for(i=out_idx; h->delayed_pic[i]; i++)
7723 h->delayed_pic[i] = h->delayed_pic[i+1];
7725 if(!out_of_order && pics > s->avctx->has_b_frames){
7726 *data_size = sizeof(AVFrame);
7728 h->outputed_poc = out->poc;
7729 *pict= *(AVFrame*)out;
7731 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7736 assert(pict->data[0] || !*data_size);
7737 ff_print_debug_info(s, pict);
7738 //printf("out %d\n", (int)pict->data[0]);
7741 /* Return the Picture timestamp as the frame number */
7742 /* we subtract 1 because it is added on utils.c */
7743 avctx->frame_number = s->picture_number - 1;
7745 return get_consumed_bytes(s, buf_index, buf_size);
7748 static inline void fill_mb_avail(H264Context *h){
7749 MpegEncContext * const s = &h->s;
7750 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7753 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7754 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7755 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7761 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7762 h->mb_avail[4]= 1; //FIXME move out
7763 h->mb_avail[5]= 0; //FIXME move out
7771 #define SIZE (COUNT*40)
7777 // int int_temp[10000];
7779 AVCodecContext avctx;
7781 dsputil_init(&dsp, &avctx);
7783 init_put_bits(&pb, temp, SIZE);
7784 printf("testing unsigned exp golomb\n");
7785 for(i=0; i<COUNT; i++){
7787 set_ue_golomb(&pb, i);
7788 STOP_TIMER("set_ue_golomb");
7790 flush_put_bits(&pb);
7792 init_get_bits(&gb, temp, 8*SIZE);
7793 for(i=0; i<COUNT; i++){
7796 s= show_bits(&gb, 24);
7799 j= get_ue_golomb(&gb);
7801 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7804 STOP_TIMER("get_ue_golomb");
7808 init_put_bits(&pb, temp, SIZE);
7809 printf("testing signed exp golomb\n");
7810 for(i=0; i<COUNT; i++){
7812 set_se_golomb(&pb, i - COUNT/2);
7813 STOP_TIMER("set_se_golomb");
7815 flush_put_bits(&pb);
7817 init_get_bits(&gb, temp, 8*SIZE);
7818 for(i=0; i<COUNT; i++){
7821 s= show_bits(&gb, 24);
7824 j= get_se_golomb(&gb);
7825 if(j != i - COUNT/2){
7826 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7829 STOP_TIMER("get_se_golomb");
7833 printf("testing 4x4 (I)DCT\n");
7836 uint8_t src[16], ref[16];
7837 uint64_t error= 0, max_error=0;
7839 for(i=0; i<COUNT; i++){
7841 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7842 for(j=0; j<16; j++){
7843 ref[j]= random()%255;
7844 src[j]= random()%255;
7847 h264_diff_dct_c(block, src, ref, 4);
7850 for(j=0; j<16; j++){
7851 // printf("%d ", block[j]);
7852 block[j]= block[j]*4;
7853 if(j&1) block[j]= (block[j]*4 + 2)/5;
7854 if(j&4) block[j]= (block[j]*4 + 2)/5;
7858 s->dsp.h264_idct_add(ref, block, 4);
7859 /* for(j=0; j<16; j++){
7860 printf("%d ", ref[j]);
7864 for(j=0; j<16; j++){
7865 int diff= FFABS(src[j] - ref[j]);
7868 max_error= FFMAX(max_error, diff);
7871 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7872 printf("testing quantizer\n");
7873 for(qp=0; qp<52; qp++){
7875 src1_block[i]= src2_block[i]= random()%255;
7878 printf("Testing NAL layer\n");
7880 uint8_t bitstream[COUNT];
7881 uint8_t nal[COUNT*2];
7883 memset(&h, 0, sizeof(H264Context));
7885 for(i=0; i<COUNT; i++){
7893 for(j=0; j<COUNT; j++){
7894 bitstream[j]= (random() % 255) + 1;
7897 for(j=0; j<zeros; j++){
7898 int pos= random() % COUNT;
7899 while(bitstream[pos] == 0){
7908 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7910 printf("encoding failed\n");
7914 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7918 if(out_length != COUNT){
7919 printf("incorrect length %d %d\n", out_length, COUNT);
7923 if(consumed != nal_length){
7924 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7928 if(memcmp(bitstream, out, COUNT)){
7929 printf("mismatch\n");
7935 printf("Testing RBSP\n");
7943 static av_cold int decode_end(AVCodecContext *avctx)
7945 H264Context *h = avctx->priv_data;
7946 MpegEncContext *s = &h->s;
7949 av_freep(&h->rbsp_buffer[0]);
7950 av_freep(&h->rbsp_buffer[1]);
7951 free_tables(h); //FIXME cleanup init stuff perhaps
7953 for(i = 0; i < MAX_SPS_COUNT; i++)
7954 av_freep(h->sps_buffers + i);
7956 for(i = 0; i < MAX_PPS_COUNT; i++)
7957 av_freep(h->pps_buffers + i);
7961 // memset(h, 0, sizeof(H264Context));
7967 AVCodec h264_decoder = {
7971 sizeof(H264Context),
7976 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7978 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),