2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
36 #include "rectangle.h"
37 #include "vdpau_internal.h"
41 #include "x86/h264_i386.h"
48 * Value of Picture.reference when Picture is not a reference picture, but
49 * is held for delayed output.
51 #define DELAYED_PIC_REF 4
53 static VLC coeff_token_vlc[4];
54 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
55 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57 static VLC chroma_dc_coeff_token_vlc;
58 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
59 static const int chroma_dc_coeff_token_vlc_table_size = 256;
61 static VLC total_zeros_vlc[15];
62 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
63 static const int total_zeros_vlc_tables_size = 512;
65 static VLC chroma_dc_total_zeros_vlc[3];
66 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
67 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
69 static VLC run_vlc[6];
70 static VLC_TYPE run_vlc_tables[6][8][2];
71 static const int run_vlc_tables_size = 8;
74 static VLC_TYPE run7_vlc_table[96][2];
75 static const int run7_vlc_table_size = 96;
77 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
78 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
79 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static Picture * remove_long(H264Context *h, int i, int ref_mask);
83 static av_always_inline uint32_t pack16to32(int a, int b){
84 #ifdef WORDS_BIGENDIAN
85 return (b&0xFFFF) + (a<<16);
87 return (a&0xFFFF) + (b<<16);
91 static const uint8_t rem6[52]={
92 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
95 static const uint8_t div6[52]={
96 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
99 static const int left_block_options[4][8]={
106 #define LEVEL_TAB_BITS 8
107 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
110 MpegEncContext * const s = &h->s;
111 const int mb_xy= h->mb_xy;
112 int topleft_xy, top_xy, topright_xy, left_xy[2];
113 int topleft_type, top_type, topright_type, left_type[2];
114 const int * left_block;
115 int topleft_partition= -1;
118 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
120 //FIXME deblocking could skip the intra and nnz parts.
121 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
124 /* Wow, what a mess, why didn't they simplify the interlacing & intra
125 * stuff, I can't imagine that these complex rules are worth it. */
127 topleft_xy = top_xy - 1;
128 topright_xy= top_xy + 1;
129 left_xy[1] = left_xy[0] = mb_xy-1;
130 left_block = left_block_options[0];
132 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
133 const int top_pair_xy = pair_xy - s->mb_stride;
134 const int topleft_pair_xy = top_pair_xy - 1;
135 const int topright_pair_xy = top_pair_xy + 1;
136 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
137 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
138 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
139 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
140 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
141 const int bottom = (s->mb_y & 1);
142 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
144 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
145 top_xy -= s->mb_stride;
147 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
155 topright_xy -= s->mb_stride;
157 if (left_mb_field_flag != curr_mb_field_flag) {
158 left_xy[1] = left_xy[0] = pair_xy - 1;
159 if (curr_mb_field_flag) {
160 left_xy[1] += s->mb_stride;
161 left_block = left_block_options[3];
163 left_block= left_block_options[2 - bottom];
168 h->top_mb_xy = top_xy;
169 h->left_mb_xy[0] = left_xy[0];
170 h->left_mb_xy[1] = left_xy[1];
174 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
175 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
176 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
178 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 for(list=0; list<h->list_count; list++){
181 //These values where changed for ease of performing MC, we need to change them back
182 //FIXME maybe we can make MC and loop filter use the same values or prevent
183 //the MC code from changing ref_cache and rather use a temporary array.
184 if(USES_LIST(mb_type,list)){
185 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
196 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
197 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
198 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
199 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
201 if(IS_INTRA(mb_type)){
202 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
203 h->topleft_samples_available=
204 h->top_samples_available=
205 h->left_samples_available= 0xFFFF;
206 h->topright_samples_available= 0xEEEA;
208 if(!(top_type & type_mask)){
209 h->topleft_samples_available= 0xB3FF;
210 h->top_samples_available= 0x33FF;
211 h->topright_samples_available= 0x26EA;
213 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
214 if(IS_INTERLACED(mb_type)){
215 if(!(left_type[0] & type_mask)){
216 h->topleft_samples_available&= 0xDFFF;
217 h->left_samples_available&= 0x5FFF;
219 if(!(left_type[1] & type_mask)){
220 h->topleft_samples_available&= 0xFF5F;
221 h->left_samples_available&= 0xFF5F;
224 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
225 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
226 assert(left_xy[0] == left_xy[1]);
227 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
228 h->topleft_samples_available&= 0xDF5F;
229 h->left_samples_available&= 0x5F5F;
233 if(!(left_type[0] & type_mask)){
234 h->topleft_samples_available&= 0xDF5F;
235 h->left_samples_available&= 0x5F5F;
239 if(!(topleft_type & type_mask))
240 h->topleft_samples_available&= 0x7FFF;
242 if(!(topright_type & type_mask))
243 h->topright_samples_available&= 0xFBFF;
245 if(IS_INTRA4x4(mb_type)){
246 if(IS_INTRA4x4(top_type)){
247 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
248 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
249 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
250 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
253 if(!(top_type & type_mask))
258 h->intra4x4_pred_mode_cache[4+8*0]=
259 h->intra4x4_pred_mode_cache[5+8*0]=
260 h->intra4x4_pred_mode_cache[6+8*0]=
261 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 if(IS_INTRA4x4(left_type[i])){
265 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
266 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
269 if(!(left_type[i] & type_mask))
274 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
275 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
294 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
295 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
296 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
298 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
299 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
301 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
302 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 h->non_zero_count_cache[4+8*0]=
306 h->non_zero_count_cache[5+8*0]=
307 h->non_zero_count_cache[6+8*0]=
308 h->non_zero_count_cache[7+8*0]=
310 h->non_zero_count_cache[1+8*0]=
311 h->non_zero_count_cache[2+8*0]=
313 h->non_zero_count_cache[1+8*3]=
314 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 for (i=0; i<2; i++) {
320 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
321 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
322 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 h->non_zero_count_cache[3+8*1 + 2*8*i]=
326 h->non_zero_count_cache[3+8*2 + 2*8*i]=
327 h->non_zero_count_cache[0+8*1 + 8*i]=
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
335 h->top_cbp = h->cbp_table[top_xy];
336 } else if(IS_INTRA(mb_type)) {
343 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
344 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 for(list=0; list<h->list_count; list++){
361 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
362 /*if(!h->mv_cache_clean[list]){
363 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
364 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
365 h->mv_cache_clean[list]= 1;
369 h->mv_cache_clean[list]= 0;
371 if(USES_LIST(top_type, list)){
372 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
373 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
378 h->ref_cache[list][scan8[0] + 0 - 1*8]=
379 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
380 h->ref_cache[list][scan8[0] + 2 - 1*8]=
381 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
387 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 int cache_idx = scan8[0] - 1 + i*2*8;
392 if(USES_LIST(left_type[i], list)){
393 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
394 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
395 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
396 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
397 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
398 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 *(uint32_t*)h->mv_cache [list][cache_idx ]=
401 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
402 h->ref_cache[list][cache_idx ]=
403 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
407 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
410 if(USES_LIST(topleft_type, list)){
411 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
412 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
413 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
414 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
417 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
420 if(USES_LIST(topright_type, list)){
421 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
422 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
423 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
424 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
427 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
430 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
433 h->ref_cache[list][scan8[5 ]+1] =
434 h->ref_cache[list][scan8[7 ]+1] =
435 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
436 h->ref_cache[list][scan8[4 ]] =
437 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
438 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
441 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
442 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 /* XXX beurk, Load mvd */
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 if(USES_LIST(left_type[0], list)){
459 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 if(USES_LIST(left_type[1], list)){
467 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
477 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
478 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
480 if(h->slice_type_nos == FF_B_TYPE){
481 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483 if(IS_DIRECT(top_type)){
484 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
485 }else if(IS_8X8(top_type)){
486 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
487 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
488 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
493 if(IS_DIRECT(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
495 else if(IS_8X8(left_type[0]))
496 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500 if(IS_DIRECT(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
502 else if(IS_8X8(left_type[1]))
503 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
511 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
512 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
517 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
520 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 #define MAP_F2F(idx, mb_type)\
523 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
524 h->ref_cache[list][idx] <<= 1;\
525 h->mv_cache[list][idx][1] /= 2;\
526 h->mvd_cache[list][idx][1] /= 2;\
531 #define MAP_F2F(idx, mb_type)\
532 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
533 h->ref_cache[list][idx] >>= 1;\
534 h->mv_cache[list][idx][1] <<= 1;\
535 h->mvd_cache[list][idx][1] <<= 1;\
545 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
548 static inline void write_back_intra_pred_mode(H264Context *h){
549 const int mb_xy= h->mb_xy;
551 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
552 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
553 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
554 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
555 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
556 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
557 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
561 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 static inline int check_intra4x4_pred_mode(H264Context *h){
564 MpegEncContext * const s = &h->s;
565 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
566 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
569 if(!(h->top_samples_available&0x8000)){
571 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
576 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
581 if((h->left_samples_available&0x8888)!=0x8888){
582 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 if(!(h->left_samples_available&mask[i])){
585 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
590 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 } //FIXME cleanup like next
600 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 static inline int check_intra_pred_mode(H264Context *h, int mode){
603 MpegEncContext * const s = &h->s;
604 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
605 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
612 if(!(h->top_samples_available&0x8000)){
615 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
620 if((h->left_samples_available&0x8080) != 0x8080){
622 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
623 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
635 * gets the predicted intra4x4 prediction mode.
637 static inline int pred_intra_mode(H264Context *h, int n){
638 const int index8= scan8[n];
639 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
640 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
641 const int min= FFMIN(left, top);
643 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
645 if(min<0) return DC_PRED;
649 static inline void write_back_non_zero_count(H264Context *h){
650 const int mb_xy= h->mb_xy;
652 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
653 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
654 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
655 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
656 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
657 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
658 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
660 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
661 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
662 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
664 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
665 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
666 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
670 * gets the predicted number of non-zero coefficients.
671 * @param n block index
673 static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
679 if(i<64) i= (i+1)>>1;
681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
686 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
688 MpegEncContext *s = &h->s;
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
705 if(!USES_LIST(mb_type,list))\
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
736 tprintf(s->avctx, "topright MV not available\n");
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
750 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 int diagonal_ref, match_count;
759 assert(part_width==1 || part_width==2 || part_width==4);
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
779 }else if(top_ref==ref){
787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
805 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
831 pred_motion(h, n, 4, list, ref, mx, my);
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
840 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860 if(diagonal_ref == ref){
868 pred_motion(h, n, 2, list, ref, mx, my);
871 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
878 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
879 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
885 pred_motion(h, 0, 4, 0, 0, mx, my);
890 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
891 int poc0 = h->ref_list[0][i].poc;
892 int td = av_clip(poc1 - poc0, -128, 127);
893 if(td == 0 || h->ref_list[0][i].long_ref){
896 int tb = av_clip(poc - poc0, -128, 127);
897 int tx = (16384 + (FFABS(td) >> 1)) / td;
898 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
902 static inline void direct_dist_scale_factor(H264Context * const h){
903 MpegEncContext * const s = &h->s;
904 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
905 const int poc1 = h->ref_list[1][0].poc;
907 for(field=0; field<2; field++){
908 const int poc = h->s.current_picture_ptr->field_poc[field];
909 const int poc1 = h->ref_list[1][0].field_poc[field];
910 for(i=0; i < 2*h->ref_count[0]; i++)
911 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
919 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 int j, old_ref, rfield;
923 int start= mbafi ? 16 : 0;
924 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
925 int interl= mbafi || s->picture_structure != PICT_FRAME;
927 /* bogus; fills in for missing frames */
928 memset(map[list], 0, sizeof(map[list]));
930 for(rfield=0; rfield<2; rfield++){
931 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
932 int poc = ref1->ref_poc[colfield][list][old_ref];
936 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
937 poc= (poc&~3) + rfield + 1;
939 for(j=start; j<end; j++){
940 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
941 int cur_ref= mbafi ? (j-16)^field : j;
942 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 map[list][old_ref] = cur_ref;
952 static inline void direct_ref_list_init(H264Context * const h){
953 MpegEncContext * const s = &h->s;
954 Picture * const ref1 = &h->ref_list[1][0];
955 Picture * const cur = s->current_picture_ptr;
957 int sidx= (s->picture_structure&1)^1;
958 int ref1sidx= (ref1->reference&1)^1;
960 for(list=0; list<2; list++){
961 cur->ref_count[sidx][list] = h->ref_count[list];
962 for(j=0; j<h->ref_count[list]; j++)
963 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
966 if(s->picture_structure == PICT_FRAME){
967 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
968 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
971 cur->mbaff= FRAME_MBAFF;
973 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
976 for(list=0; list<2; list++){
977 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
978 for(field=0; field<2; field++)
979 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
983 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
984 MpegEncContext * const s = &h->s;
985 int b8_stride = h->b8_stride;
986 int b4_stride = h->b_stride;
987 int mb_xy = h->mb_xy;
989 const int16_t (*l1mv0)[2], (*l1mv1)[2];
990 const int8_t *l1ref0, *l1ref1;
991 const int is_b8x8 = IS_8X8(*mb_type);
992 unsigned int sub_mb_type;
995 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
997 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
998 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
999 int cur_poc = s->current_picture_ptr->poc;
1000 int *col_poc = h->ref_list[1]->field_poc;
1001 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1002 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1004 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1005 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1006 mb_xy += s->mb_stride*fieldoff;
1009 }else{ // AFL/AFR/FR/FL -> AFR/FR
1010 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1011 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1012 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1013 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1016 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1017 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1018 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 }else{ // AFR/FR -> AFR/FR
1029 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1030 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1031 /* FIXME save sub mb types from previous frames (or derive from MVs)
1032 * so we know exactly what block size to use */
1033 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1034 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1035 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1036 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1037 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1045 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1046 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1047 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1048 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1051 l1ref0 += h->b8_stride;
1052 l1ref1 += h->b8_stride;
1053 l1mv0 += 2*b4_stride;
1054 l1mv1 += 2*b4_stride;
1058 if(h->direct_spatial_mv_pred){
1063 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1065 /* ref = min(neighbors) */
1066 for(list=0; list<2; list++){
1067 int refa = h->ref_cache[list][scan8[0] - 1];
1068 int refb = h->ref_cache[list][scan8[0] - 8];
1069 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1070 if(refc == PART_NOT_AVAILABLE)
1071 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1072 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1077 if(ref[0] < 0 && ref[1] < 0){
1078 ref[0] = ref[1] = 0;
1079 mv[0][0] = mv[0][1] =
1080 mv[1][0] = mv[1][1] = 0;
1082 for(list=0; list<2; list++){
1084 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1086 mv[list][0] = mv[list][1] = 0;
1092 *mb_type &= ~MB_TYPE_L1;
1093 sub_mb_type &= ~MB_TYPE_L1;
1094 }else if(ref[0] < 0){
1096 *mb_type &= ~MB_TYPE_L0;
1097 sub_mb_type &= ~MB_TYPE_L0;
1100 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1101 for(i8=0; i8<4; i8++){
1104 int xy8 = x8+y8*b8_stride;
1105 int xy4 = 3*x8+y8*b4_stride;
1108 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1110 h->sub_mb_type[i8] = sub_mb_type;
1112 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1114 if(!IS_INTRA(mb_type_col[y8])
1115 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1116 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1118 a= pack16to32(mv[0][0],mv[0][1]);
1120 b= pack16to32(mv[1][0],mv[1][1]);
1122 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1126 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1128 }else if(IS_16X16(*mb_type)){
1131 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1132 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1133 if(!IS_INTRA(mb_type_col[0])
1134 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1135 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1136 && (h->x264_build>33 || !h->x264_build)))){
1138 a= pack16to32(mv[0][0],mv[0][1]);
1140 b= pack16to32(mv[1][0],mv[1][1]);
1142 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1146 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1148 for(i8=0; i8<4; i8++){
1149 const int x8 = i8&1;
1150 const int y8 = i8>>1;
1152 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1154 h->sub_mb_type[i8] = sub_mb_type;
1156 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1158 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1159 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1162 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1163 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1164 && (h->x264_build>33 || !h->x264_build)))){
1165 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1166 if(IS_SUB_8X8(sub_mb_type)){
1167 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1168 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1170 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1172 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 for(i4=0; i4<4; i4++){
1176 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1177 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1179 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1181 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1187 }else{ /* direct temporal mv pred */
1188 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1189 const int *dist_scale_factor = h->dist_scale_factor;
1192 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1193 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1194 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1195 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1197 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1200 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1201 /* FIXME assumes direct_8x8_inference == 1 */
1202 int y_shift = 2*!IS_INTERLACED(*mb_type);
1204 for(i8=0; i8<4; i8++){
1205 const int x8 = i8&1;
1206 const int y8 = i8>>1;
1208 const int16_t (*l1mv)[2]= l1mv0;
1210 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1212 h->sub_mb_type[i8] = sub_mb_type;
1214 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 if(IS_INTRA(mb_type_col[y8])){
1216 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1217 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1218 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 ref0 = l1ref0[x8 + y8*b8_stride];
1224 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1226 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1229 scale = dist_scale_factor[ref0];
1230 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1233 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1234 int my_col = (mv_col[1]<<y_shift)/2;
1235 int mx = (scale * mv_col[0] + 128) >> 8;
1236 int my = (scale * my_col + 128) >> 8;
1237 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1238 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 /* one-to-one mv scaling */
1246 if(IS_16X16(*mb_type)){
1249 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1250 if(IS_INTRA(mb_type_col[0])){
1253 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1254 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1255 const int scale = dist_scale_factor[ref0];
1256 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1258 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1259 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1261 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1262 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1264 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1265 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1266 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1268 for(i8=0; i8<4; i8++){
1269 const int x8 = i8&1;
1270 const int y8 = i8>>1;
1272 const int16_t (*l1mv)[2]= l1mv0;
1274 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1276 h->sub_mb_type[i8] = sub_mb_type;
1277 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col[0])){
1279 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1280 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1281 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1287 ref0 = map_col_to_list0[0][ref0];
1289 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1292 scale = dist_scale_factor[ref0];
1294 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1295 if(IS_SUB_8X8(sub_mb_type)){
1296 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1297 int mx = (scale * mv_col[0] + 128) >> 8;
1298 int my = (scale * mv_col[1] + 128) >> 8;
1299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1302 for(i4=0; i4<4; i4++){
1303 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1304 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1305 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1306 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1307 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1308 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1315 static inline void write_back_motion(H264Context *h, int mb_type){
1316 MpegEncContext * const s = &h->s;
1317 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1318 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1321 if(!USES_LIST(mb_type, 0))
1322 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1324 for(list=0; list<h->list_count; list++){
1326 if(!USES_LIST(mb_type, list))
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1333 if( h->pps.cabac ) {
1334 if(IS_SKIP(mb_type))
1335 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1338 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1344 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1345 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1346 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1347 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1348 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1352 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1353 if(IS_8X8(mb_type)){
1354 uint8_t *direct_table = &h->direct_table[b8_xy];
1355 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1356 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1357 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1363 * Decodes a network abstraction layer unit.
1364 * @param consumed is the number of bytes used as input
1365 * @param length is the length of the array
1366 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1367 * @returns decoded bytes, might be src+1 if no escapes
1369 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1374 // src[0]&0x80; //forbidden bit
1375 h->nal_ref_idc= src[0]>>5;
1376 h->nal_unit_type= src[0]&0x1F;
1380 for(i=0; i<length; i++)
1381 printf("%2X ", src[i]);
1384 #if HAVE_FAST_UNALIGNED
1385 # if HAVE_FAST_64BIT
1387 for(i=0; i+1<length; i+=9){
1388 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1391 for(i=0; i+1<length; i+=5){
1392 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1395 if(i>0 && !src[i]) i--;
1399 for(i=0; i+1<length; i+=2){
1400 if(src[i]) continue;
1401 if(i>0 && src[i-1]==0) i--;
1403 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1405 /* startcode, so we must be past the end */
1413 if(i>=length-1){ //no escaped 0
1414 *dst_length= length;
1415 *consumed= length+1; //+1 for the header
1419 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1420 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1421 dst= h->rbsp_buffer[bufidx];
1427 //printf("decoding esc\n");
1428 memcpy(dst, src, i);
1431 //remove escapes (very rare 1:2^22)
1433 dst[di++]= src[si++];
1434 dst[di++]= src[si++];
1435 }else if(src[si]==0 && src[si+1]==0){
1436 if(src[si+2]==3){ //escape
1441 }else //next start code
1445 dst[di++]= src[si++];
1448 dst[di++]= src[si++];
1451 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1454 *consumed= si + 1;//+1 for the header
1455 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1460 * identifies the exact end of the bitstream
1461 * @return the length of the trailing, or 0 if damaged
1463 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1467 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1477 * IDCT transforms the 16 dc values and dequantizes them.
1478 * @param qp quantization parameter
1480 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1483 int temp[16]; //FIXME check if this is a good idea
1484 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1485 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1487 //memset(block, 64, 2*256);
1490 const int offset= y_offset[i];
1491 const int z0= block[offset+stride*0] + block[offset+stride*4];
1492 const int z1= block[offset+stride*0] - block[offset+stride*4];
1493 const int z2= block[offset+stride*1] - block[offset+stride*5];
1494 const int z3= block[offset+stride*1] + block[offset+stride*5];
1503 const int offset= x_offset[i];
1504 const int z0= temp[4*0+i] + temp[4*2+i];
1505 const int z1= temp[4*0+i] - temp[4*2+i];
1506 const int z2= temp[4*1+i] - temp[4*3+i];
1507 const int z3= temp[4*1+i] + temp[4*3+i];
1509 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1510 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1511 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1512 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1518 * DCT transforms the 16 dc values.
1519 * @param qp quantization parameter ??? FIXME
1521 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1522 // const int qmul= dequant_coeff[qp][0];
1524 int temp[16]; //FIXME check if this is a good idea
1525 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1526 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1529 const int offset= y_offset[i];
1530 const int z0= block[offset+stride*0] + block[offset+stride*4];
1531 const int z1= block[offset+stride*0] - block[offset+stride*4];
1532 const int z2= block[offset+stride*1] - block[offset+stride*5];
1533 const int z3= block[offset+stride*1] + block[offset+stride*5];
1542 const int offset= x_offset[i];
1543 const int z0= temp[4*0+i] + temp[4*2+i];
1544 const int z1= temp[4*0+i] - temp[4*2+i];
1545 const int z2= temp[4*1+i] - temp[4*3+i];
1546 const int z3= temp[4*1+i] + temp[4*3+i];
1548 block[stride*0 +offset]= (z0 + z3)>>1;
1549 block[stride*2 +offset]= (z1 + z2)>>1;
1550 block[stride*8 +offset]= (z1 - z2)>>1;
1551 block[stride*10+offset]= (z0 - z3)>>1;
1559 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1560 const int stride= 16*2;
1561 const int xStride= 16;
1564 a= block[stride*0 + xStride*0];
1565 b= block[stride*0 + xStride*1];
1566 c= block[stride*1 + xStride*0];
1567 d= block[stride*1 + xStride*1];
1574 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1575 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1576 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1577 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1581 static void chroma_dc_dct_c(DCTELEM *block){
1582 const int stride= 16*2;
1583 const int xStride= 16;
1586 a= block[stride*0 + xStride*0];
1587 b= block[stride*0 + xStride*1];
1588 c= block[stride*1 + xStride*0];
1589 d= block[stride*1 + xStride*1];
1596 block[stride*0 + xStride*0]= (a+c);
1597 block[stride*0 + xStride*1]= (e+b);
1598 block[stride*1 + xStride*0]= (a-c);
1599 block[stride*1 + xStride*1]= (e-b);
1604 * gets the chroma qp.
1606 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1607 return h->pps.chroma_qp_table[t][qscale];
1610 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1611 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1612 int src_x_offset, int src_y_offset,
1613 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1614 MpegEncContext * const s = &h->s;
1615 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1616 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1617 const int luma_xy= (mx&3) + ((my&3)<<2);
1618 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1619 uint8_t * src_cb, * src_cr;
1620 int extra_width= h->emu_edge_width;
1621 int extra_height= h->emu_edge_height;
1623 const int full_mx= mx>>2;
1624 const int full_my= my>>2;
1625 const int pic_width = 16*s->mb_width;
1626 const int pic_height = 16*s->mb_height >> MB_FIELD;
1628 if(mx&7) extra_width -= 3;
1629 if(my&7) extra_height -= 3;
1631 if( full_mx < 0-extra_width
1632 || full_my < 0-extra_height
1633 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1634 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1635 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1636 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1640 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1642 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1645 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1648 // chroma offset when predicting from a field of opposite parity
1649 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1650 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1652 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1656 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1657 src_cb= s->edge_emu_buffer;
1659 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1662 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1663 src_cr= s->edge_emu_buffer;
1665 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1668 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1669 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1670 int x_offset, int y_offset,
1671 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1672 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1673 int list0, int list1){
1674 MpegEncContext * const s = &h->s;
1675 qpel_mc_func *qpix_op= qpix_put;
1676 h264_chroma_mc_func chroma_op= chroma_put;
1678 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1679 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1680 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1681 x_offset += 8*s->mb_x;
1682 y_offset += 8*(s->mb_y >> MB_FIELD);
1685 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1691 chroma_op= chroma_avg;
1695 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1696 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1697 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1698 qpix_op, chroma_op);
1702 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1703 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1704 int x_offset, int y_offset,
1705 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1706 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1707 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1708 int list0, int list1){
1709 MpegEncContext * const s = &h->s;
1711 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1712 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1713 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1714 x_offset += 8*s->mb_x;
1715 y_offset += 8*(s->mb_y >> MB_FIELD);
1718 /* don't optimize for luma-only case, since B-frames usually
1719 * use implicit weights => chroma too. */
1720 uint8_t *tmp_cb = s->obmc_scratchpad;
1721 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1722 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1723 int refn0 = h->ref_cache[0][ scan8[n] ];
1724 int refn1 = h->ref_cache[1][ scan8[n] ];
1726 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1727 dest_y, dest_cb, dest_cr,
1728 x_offset, y_offset, qpix_put, chroma_put);
1729 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1730 tmp_y, tmp_cb, tmp_cr,
1731 x_offset, y_offset, qpix_put, chroma_put);
1733 if(h->use_weight == 2){
1734 int weight0 = h->implicit_weight[refn0][refn1];
1735 int weight1 = 64 - weight0;
1736 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1737 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1740 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1741 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1742 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1743 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1745 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1746 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1748 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1751 int list = list1 ? 1 : 0;
1752 int refn = h->ref_cache[list][ scan8[n] ];
1753 Picture *ref= &h->ref_list[list][refn];
1754 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1755 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1756 qpix_put, chroma_put);
1758 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1759 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1760 if(h->use_weight_chroma){
1761 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1762 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1763 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1764 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1769 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1770 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1771 int x_offset, int y_offset,
1772 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1773 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1774 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1775 int list0, int list1){
1776 if((h->use_weight==2 && list0 && list1
1777 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1778 || h->use_weight==1)
1779 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1780 x_offset, y_offset, qpix_put, chroma_put,
1781 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1783 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1784 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1787 static inline void prefetch_motion(H264Context *h, int list){
1788 /* fetch pixels for estimated mv 4 macroblocks ahead
1789 * optimized for 64byte cache lines */
1790 MpegEncContext * const s = &h->s;
1791 const int refn = h->ref_cache[list][scan8[0]];
1793 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1794 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1795 uint8_t **src= h->ref_list[list][refn].data;
1796 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1797 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1798 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1799 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1803 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1804 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1805 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1806 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1807 MpegEncContext * const s = &h->s;
1808 const int mb_xy= h->mb_xy;
1809 const int mb_type= s->current_picture.mb_type[mb_xy];
1811 assert(IS_INTER(mb_type));
1813 prefetch_motion(h, 0);
1815 if(IS_16X16(mb_type)){
1816 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1817 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1818 &weight_op[0], &weight_avg[0],
1819 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1820 }else if(IS_16X8(mb_type)){
1821 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1822 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1823 &weight_op[1], &weight_avg[1],
1824 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1825 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1826 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1827 &weight_op[1], &weight_avg[1],
1828 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1829 }else if(IS_8X16(mb_type)){
1830 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1832 &weight_op[2], &weight_avg[2],
1833 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1834 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1835 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1836 &weight_op[2], &weight_avg[2],
1837 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1841 assert(IS_8X8(mb_type));
1844 const int sub_mb_type= h->sub_mb_type[i];
1846 int x_offset= (i&1)<<2;
1847 int y_offset= (i&2)<<1;
1849 if(IS_SUB_8X8(sub_mb_type)){
1850 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1851 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1852 &weight_op[3], &weight_avg[3],
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 }else if(IS_SUB_8X4(sub_mb_type)){
1855 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1856 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1857 &weight_op[4], &weight_avg[4],
1858 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1859 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1860 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1861 &weight_op[4], &weight_avg[4],
1862 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1863 }else if(IS_SUB_4X8(sub_mb_type)){
1864 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1865 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1866 &weight_op[5], &weight_avg[5],
1867 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1868 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870 &weight_op[5], &weight_avg[5],
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1874 assert(IS_SUB_4X4(sub_mb_type));
1876 int sub_x_offset= x_offset + 2*(j&1);
1877 int sub_y_offset= y_offset + (j&2);
1878 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1879 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1880 &weight_op[6], &weight_avg[6],
1881 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1887 prefetch_motion(h, 1);
1890 static av_cold void init_cavlc_level_tab(void){
1891 int suffix_length, mask;
1894 for(suffix_length=0; suffix_length<7; suffix_length++){
1895 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1896 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1897 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1899 mask= -(level_code&1);
1900 level_code= (((2+level_code)>>1) ^ mask) - mask;
1901 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1902 cavlc_level_tab[suffix_length][i][0]= level_code;
1903 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1904 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1905 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1906 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1908 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1909 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1915 static av_cold void decode_init_vlc(void){
1916 static int done = 0;
1923 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1924 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1925 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1926 &chroma_dc_coeff_token_len [0], 1, 1,
1927 &chroma_dc_coeff_token_bits[0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1932 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1933 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1934 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1935 &coeff_token_len [i][0], 1, 1,
1936 &coeff_token_bits[i][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1938 offset += coeff_token_vlc_tables_size[i];
1941 * This is a one time safety check to make sure that
1942 * the packed static coeff_token_vlc table sizes
1943 * were initialized correctly.
1945 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1948 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1949 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1950 init_vlc(&chroma_dc_total_zeros_vlc[i],
1951 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1952 &chroma_dc_total_zeros_len [i][0], 1, 1,
1953 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1954 INIT_VLC_USE_NEW_STATIC);
1956 for(i=0; i<15; i++){
1957 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1958 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1959 init_vlc(&total_zeros_vlc[i],
1960 TOTAL_ZEROS_VLC_BITS, 16,
1961 &total_zeros_len [i][0], 1, 1,
1962 &total_zeros_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
1967 run_vlc[i].table = run_vlc_tables[i];
1968 run_vlc[i].table_allocated = run_vlc_tables_size;
1969 init_vlc(&run_vlc[i],
1971 &run_len [i][0], 1, 1,
1972 &run_bits[i][0], 1, 1,
1973 INIT_VLC_USE_NEW_STATIC);
1975 run7_vlc.table = run7_vlc_table,
1976 run7_vlc.table_allocated = run7_vlc_table_size;
1977 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1978 &run_len [6][0], 1, 1,
1979 &run_bits[6][0], 1, 1,
1980 INIT_VLC_USE_NEW_STATIC);
1982 init_cavlc_level_tab();
1986 static void free_tables(H264Context *h){
1989 av_freep(&h->intra4x4_pred_mode);
1990 av_freep(&h->chroma_pred_mode_table);
1991 av_freep(&h->cbp_table);
1992 av_freep(&h->mvd_table[0]);
1993 av_freep(&h->mvd_table[1]);
1994 av_freep(&h->direct_table);
1995 av_freep(&h->non_zero_count);
1996 av_freep(&h->slice_table_base);
1997 h->slice_table= NULL;
1999 av_freep(&h->mb2b_xy);
2000 av_freep(&h->mb2b8_xy);
2002 for(i = 0; i < h->s.avctx->thread_count; i++) {
2003 hx = h->thread_context[i];
2005 av_freep(&hx->top_borders[1]);
2006 av_freep(&hx->top_borders[0]);
2007 av_freep(&hx->s.obmc_scratchpad);
2011 static void init_dequant8_coeff_table(H264Context *h){
2013 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2014 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2015 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2017 for(i=0; i<2; i++ ){
2018 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2019 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2023 for(q=0; q<52; q++){
2024 int shift = div6[q];
2027 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2028 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2029 h->pps.scaling_matrix8[i][x]) << shift;
2034 static void init_dequant4_coeff_table(H264Context *h){
2036 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2037 for(i=0; i<6; i++ ){
2038 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2040 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2041 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2048 for(q=0; q<52; q++){
2049 int shift = div6[q] + 2;
2052 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2053 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2054 h->pps.scaling_matrix4[i][x]) << shift;
2059 static void init_dequant_tables(H264Context *h){
2061 init_dequant4_coeff_table(h);
2062 if(h->pps.transform_8x8_mode)
2063 init_dequant8_coeff_table(h);
2064 if(h->sps.transform_bypass){
2067 h->dequant4_coeff[i][0][x] = 1<<6;
2068 if(h->pps.transform_8x8_mode)
2071 h->dequant8_coeff[i][0][x] = 1<<6;
2078 * needs width/height
2080 static int alloc_tables(H264Context *h){
2081 MpegEncContext * const s = &h->s;
2082 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2085 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2087 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2088 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2089 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2091 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2092 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2093 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2094 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2096 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2097 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2099 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2100 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2101 for(y=0; y<s->mb_height; y++){
2102 for(x=0; x<s->mb_width; x++){
2103 const int mb_xy= x + y*s->mb_stride;
2104 const int b_xy = 4*x + 4*y*h->b_stride;
2105 const int b8_xy= 2*x + 2*y*h->b8_stride;
2107 h->mb2b_xy [mb_xy]= b_xy;
2108 h->mb2b8_xy[mb_xy]= b8_xy;
2112 s->obmc_scratchpad = NULL;
2114 if(!h->dequant4_coeff[0])
2115 init_dequant_tables(h);
2124 * Mimic alloc_tables(), but for every context thread.
2126 static void clone_tables(H264Context *dst, H264Context *src){
2127 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2128 dst->non_zero_count = src->non_zero_count;
2129 dst->slice_table = src->slice_table;
2130 dst->cbp_table = src->cbp_table;
2131 dst->mb2b_xy = src->mb2b_xy;
2132 dst->mb2b8_xy = src->mb2b8_xy;
2133 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2134 dst->mvd_table[0] = src->mvd_table[0];
2135 dst->mvd_table[1] = src->mvd_table[1];
2136 dst->direct_table = src->direct_table;
2138 dst->s.obmc_scratchpad = NULL;
2139 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2144 * Allocate buffers which are not shared amongst multiple threads.
2146 static int context_init(H264Context *h){
2147 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2148 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2152 return -1; // free_tables will clean up for us
2155 static av_cold void common_init(H264Context *h){
2156 MpegEncContext * const s = &h->s;
2158 s->width = s->avctx->width;
2159 s->height = s->avctx->height;
2160 s->codec_id= s->avctx->codec->id;
2162 ff_h264_pred_init(&h->hpc, s->codec_id);
2164 h->dequant_coeff_pps= -1;
2165 s->unrestricted_mv=1;
2166 s->decode=1; //FIXME
2168 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2170 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2171 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2174 static av_cold int decode_init(AVCodecContext *avctx){
2175 H264Context *h= avctx->priv_data;
2176 MpegEncContext * const s = &h->s;
2178 MPV_decode_defaults(s);
2183 s->out_format = FMT_H264;
2184 s->workaround_bugs= avctx->workaround_bugs;
2187 // s->decode_mb= ff_h263_decode_mb;
2188 s->quarter_sample = 1;
2191 if(avctx->codec_id == CODEC_ID_SVQ3)
2192 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2193 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2194 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2196 avctx->pix_fmt= PIX_FMT_YUV420P;
2200 if(avctx->extradata_size > 0 && avctx->extradata &&
2201 *(char *)avctx->extradata == 1){
2208 h->thread_context[0] = h;
2209 h->outputed_poc = INT_MIN;
2210 h->prev_poc_msb= 1<<16;
2214 static int frame_start(H264Context *h){
2215 MpegEncContext * const s = &h->s;
2218 if(MPV_frame_start(s, s->avctx) < 0)
2220 ff_er_frame_start(s);
2222 * MPV_frame_start uses pict_type to derive key_frame.
2223 * This is incorrect for H.264; IDR markings must be used.
2224 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2225 * See decode_nal_units().
2227 s->current_picture_ptr->key_frame= 0;
2229 assert(s->linesize && s->uvlinesize);
2231 for(i=0; i<16; i++){
2232 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2233 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2236 h->block_offset[16+i]=
2237 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+16+i]=
2239 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2242 /* can't be in alloc_tables because linesize isn't known there.
2243 * FIXME: redo bipred weight to not require extra buffer? */
2244 for(i = 0; i < s->avctx->thread_count; i++)
2245 if(!h->thread_context[i]->s.obmc_scratchpad)
2246 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2248 /* some macroblocks will be accessed before they're available */
2249 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2250 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2252 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2254 // We mark the current picture as non-reference after allocating it, so
2255 // that if we break out due to an error it can be released automatically
2256 // in the next MPV_frame_start().
2257 // SVQ3 as well as most other codecs have only last/next/current and thus
2258 // get released even with set reference, besides SVQ3 and others do not
2259 // mark frames as reference later "naturally".
2260 if(s->codec_id != CODEC_ID_SVQ3)
2261 s->current_picture_ptr->reference= 0;
2263 s->current_picture_ptr->field_poc[0]=
2264 s->current_picture_ptr->field_poc[1]= INT_MAX;
2265 assert(s->current_picture_ptr->long_ref==0);
2270 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2271 MpegEncContext * const s = &h->s;
2280 src_cb -= uvlinesize;
2281 src_cr -= uvlinesize;
2283 if(!simple && FRAME_MBAFF){
2285 offset = MB_MBAFF ? 1 : 17;
2286 uvoffset= MB_MBAFF ? 1 : 9;
2288 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2289 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2290 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2291 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2297 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2298 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2299 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2300 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2306 top_idx = MB_MBAFF ? 0 : 1;
2308 step= MB_MBAFF ? 2 : 1;
2311 // There are two lines saved, the line above the the top macroblock of a pair,
2312 // and the line above the bottom macroblock
2313 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2314 for(i=1; i<17 - skiplast; i++){
2315 h->left_border[offset+i*step]= src_y[15+i* linesize];
2318 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2319 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2321 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2322 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2323 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2324 for(i=1; i<9 - skiplast; i++){
2325 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2326 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2328 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2329 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2333 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2334 MpegEncContext * const s = &h->s;
2345 if(!simple && FRAME_MBAFF){
2347 offset = MB_MBAFF ? 1 : 17;
2348 uvoffset= MB_MBAFF ? 1 : 9;
2352 top_idx = MB_MBAFF ? 0 : 1;
2354 step= MB_MBAFF ? 2 : 1;
2357 if(h->deblocking_filter == 2) {
2359 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2360 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2362 deblock_left = (s->mb_x > 0);
2363 deblock_top = (s->mb_y > !!MB_FIELD);
2366 src_y -= linesize + 1;
2367 src_cb -= uvlinesize + 1;
2368 src_cr -= uvlinesize + 1;
2370 #define XCHG(a,b,t,xchg)\
2377 for(i = !deblock_top; i<16; i++){
2378 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2380 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2384 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2385 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2386 if(s->mb_x+1 < s->mb_width){
2387 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2391 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2393 for(i = !deblock_top; i<8; i++){
2394 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2395 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2397 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2398 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2401 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2402 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2407 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2408 MpegEncContext * const s = &h->s;
2409 const int mb_x= s->mb_x;
2410 const int mb_y= s->mb_y;
2411 const int mb_xy= h->mb_xy;
2412 const int mb_type= s->current_picture.mb_type[mb_xy];
2413 uint8_t *dest_y, *dest_cb, *dest_cr;
2414 int linesize, uvlinesize /*dct_offset*/;
2416 int *block_offset = &h->block_offset[0];
2417 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2418 /* is_h264 should always be true if SVQ3 is disabled. */
2419 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2420 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2421 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2423 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2424 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2425 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2427 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2428 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2430 if (!simple && MB_FIELD) {
2431 linesize = h->mb_linesize = s->linesize * 2;
2432 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2433 block_offset = &h->block_offset[24];
2434 if(mb_y&1){ //FIXME move out of this function?
2435 dest_y -= s->linesize*15;
2436 dest_cb-= s->uvlinesize*7;
2437 dest_cr-= s->uvlinesize*7;
2441 for(list=0; list<h->list_count; list++){
2442 if(!USES_LIST(mb_type, list))
2444 if(IS_16X16(mb_type)){
2445 int8_t *ref = &h->ref_cache[list][scan8[0]];
2446 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2448 for(i=0; i<16; i+=4){
2449 int ref = h->ref_cache[list][scan8[i]];
2451 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2457 linesize = h->mb_linesize = s->linesize;
2458 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2459 // dct_offset = s->linesize * 16;
2462 if (!simple && IS_INTRA_PCM(mb_type)) {
2463 for (i=0; i<16; i++) {
2464 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2466 for (i=0; i<8; i++) {
2467 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2468 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2471 if(IS_INTRA(mb_type)){
2472 if(h->deblocking_filter)
2473 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2475 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2476 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2477 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2480 if(IS_INTRA4x4(mb_type)){
2481 if(simple || !s->encoding){
2482 if(IS_8x8DCT(mb_type)){
2483 if(transform_bypass){
2485 idct_add = s->dsp.add_pixels8;
2487 idct_dc_add = s->dsp.h264_idct8_dc_add;
2488 idct_add = s->dsp.h264_idct8_add;
2490 for(i=0; i<16; i+=4){
2491 uint8_t * const ptr= dest_y + block_offset[i];
2492 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2493 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2494 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2496 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2497 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2498 (h->topright_samples_available<<i)&0x4000, linesize);
2500 if(nnz == 1 && h->mb[i*16])
2501 idct_dc_add(ptr, h->mb + i*16, linesize);
2503 idct_add (ptr, h->mb + i*16, linesize);
2508 if(transform_bypass){
2510 idct_add = s->dsp.add_pixels4;
2512 idct_dc_add = s->dsp.h264_idct_dc_add;
2513 idct_add = s->dsp.h264_idct_add;
2515 for(i=0; i<16; i++){
2516 uint8_t * const ptr= dest_y + block_offset[i];
2517 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2519 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2520 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2524 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2525 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2526 assert(mb_y || linesize <= block_offset[i]);
2527 if(!topright_avail){
2528 tr= ptr[3 - linesize]*0x01010101;
2529 topright= (uint8_t*) &tr;
2531 topright= ptr + 4 - linesize;
2535 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2536 nnz = h->non_zero_count_cache[ scan8[i] ];
2539 if(nnz == 1 && h->mb[i*16])
2540 idct_dc_add(ptr, h->mb + i*16, linesize);
2542 idct_add (ptr, h->mb + i*16, linesize);
2544 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2551 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2553 if(!transform_bypass)
2554 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2556 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2558 if(h->deblocking_filter)
2559 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2561 hl_motion(h, dest_y, dest_cb, dest_cr,
2562 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2563 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2564 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2568 if(!IS_INTRA4x4(mb_type)){
2570 if(IS_INTRA16x16(mb_type)){
2571 if(transform_bypass){
2572 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2573 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2575 for(i=0; i<16; i++){
2576 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2577 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2581 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2583 }else if(h->cbp&15){
2584 if(transform_bypass){
2585 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2586 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2587 for(i=0; i<16; i+=di){
2588 if(h->non_zero_count_cache[ scan8[i] ]){
2589 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2593 if(IS_8x8DCT(mb_type)){
2594 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2596 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2601 for(i=0; i<16; i++){
2602 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2603 uint8_t * const ptr= dest_y + block_offset[i];
2604 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2610 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2611 uint8_t *dest[2] = {dest_cb, dest_cr};
2612 if(transform_bypass){
2613 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2614 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2615 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2617 idct_add = s->dsp.add_pixels4;
2618 for(i=16; i<16+8; i++){
2619 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2620 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2624 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2625 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2627 idct_add = s->dsp.h264_idct_add;
2628 idct_dc_add = s->dsp.h264_idct_dc_add;
2629 for(i=16; i<16+8; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ])
2631 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2632 else if(h->mb[i*16])
2633 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2636 for(i=16; i<16+8; i++){
2637 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2638 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2639 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2646 if(h->cbp || IS_INTRA(mb_type))
2647 s->dsp.clear_blocks(h->mb);
2649 if(h->deblocking_filter) {
2650 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2651 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2652 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2653 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2654 if (!simple && FRAME_MBAFF) {
2655 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2657 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2663 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2665 static void hl_decode_mb_simple(H264Context *h){
2666 hl_decode_mb_internal(h, 1);
2670 * Process a macroblock; this handles edge cases, such as interlacing.
2672 static void av_noinline hl_decode_mb_complex(H264Context *h){
2673 hl_decode_mb_internal(h, 0);
2676 static void hl_decode_mb(H264Context *h){
2677 MpegEncContext * const s = &h->s;
2678 const int mb_xy= h->mb_xy;
2679 const int mb_type= s->current_picture.mb_type[mb_xy];
2680 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2682 if(CONFIG_H264_ENCODER && !s->decode)
2686 hl_decode_mb_complex(h);
2687 else hl_decode_mb_simple(h);
2690 static void pic_as_field(Picture *pic, const int parity){
2692 for (i = 0; i < 4; ++i) {
2693 if (parity == PICT_BOTTOM_FIELD)
2694 pic->data[i] += pic->linesize[i];
2695 pic->reference = parity;
2696 pic->linesize[i] *= 2;
2698 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2701 static int split_field_copy(Picture *dest, Picture *src,
2702 int parity, int id_add){
2703 int match = !!(src->reference & parity);
2707 if(parity != PICT_FRAME){
2708 pic_as_field(dest, parity);
2710 dest->pic_id += id_add;
2717 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2721 while(i[0]<len || i[1]<len){
2722 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2724 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2727 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2728 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2731 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2732 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2739 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2744 best_poc= dir ? INT_MIN : INT_MAX;
2746 for(i=0; i<len; i++){
2747 const int poc= src[i]->poc;
2748 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2750 sorted[out_i]= src[i];
2753 if(best_poc == (dir ? INT_MIN : INT_MAX))
2755 limit= sorted[out_i++]->poc - dir;
2761 * fills the default_ref_list.
2763 static int fill_default_ref_list(H264Context *h){
2764 MpegEncContext * const s = &h->s;
2767 if(h->slice_type_nos==FF_B_TYPE){
2768 Picture *sorted[32];
2773 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2775 cur_poc= s->current_picture_ptr->poc;
2777 for(list= 0; list<2; list++){
2778 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2779 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2781 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2782 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2785 if(len < h->ref_count[list])
2786 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2790 if(lens[0] == lens[1] && lens[1] > 1){
2791 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2793 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2796 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2797 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2799 if(len < h->ref_count[0])
2800 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2803 for (i=0; i<h->ref_count[0]; i++) {
2804 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2806 if(h->slice_type_nos==FF_B_TYPE){
2807 for (i=0; i<h->ref_count[1]; i++) {
2808 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2815 static void print_short_term(H264Context *h);
2816 static void print_long_term(H264Context *h);
2819 * Extract structure information about the picture described by pic_num in
2820 * the current decoding context (frame or field). Note that pic_num is
2821 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2822 * @param pic_num picture number for which to extract structure information
2823 * @param structure one of PICT_XXX describing structure of picture
2825 * @return frame number (short term) or long term index of picture
2826 * described by pic_num
2828 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2829 MpegEncContext * const s = &h->s;
2831 *structure = s->picture_structure;
2834 /* opposite field */
2835 *structure ^= PICT_FRAME;
2842 static int decode_ref_pic_list_reordering(H264Context *h){
2843 MpegEncContext * const s = &h->s;
2844 int list, index, pic_structure;
2846 print_short_term(h);
2849 for(list=0; list<h->list_count; list++){
2850 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2852 if(get_bits1(&s->gb)){
2853 int pred= h->curr_pic_num;
2855 for(index=0; ; index++){
2856 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2857 unsigned int pic_id;
2859 Picture *ref = NULL;
2861 if(reordering_of_pic_nums_idc==3)
2864 if(index >= h->ref_count[list]){
2865 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2869 if(reordering_of_pic_nums_idc<3){
2870 if(reordering_of_pic_nums_idc<2){
2871 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2874 if(abs_diff_pic_num > h->max_pic_num){
2875 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2879 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2880 else pred+= abs_diff_pic_num;
2881 pred &= h->max_pic_num - 1;
2883 frame_num = pic_num_extract(h, pred, &pic_structure);
2885 for(i= h->short_ref_count-1; i>=0; i--){
2886 ref = h->short_ref[i];
2887 assert(ref->reference);
2888 assert(!ref->long_ref);
2890 ref->frame_num == frame_num &&
2891 (ref->reference & pic_structure)
2899 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2901 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2904 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2907 ref = h->long_ref[long_idx];
2908 assert(!(ref && !ref->reference));
2909 if(ref && (ref->reference & pic_structure)){
2910 ref->pic_id= pic_id;
2911 assert(ref->long_ref);
2919 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2920 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2922 for(i=index; i+1<h->ref_count[list]; i++){
2923 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2926 for(; i > index; i--){
2927 h->ref_list[list][i]= h->ref_list[list][i-1];
2929 h->ref_list[list][index]= *ref;
2931 pic_as_field(&h->ref_list[list][index], pic_structure);
2935 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2941 for(list=0; list<h->list_count; list++){
2942 for(index= 0; index < h->ref_count[list]; index++){
2943 if(!h->ref_list[list][index].data[0]){
2944 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2945 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2953 static void fill_mbaff_ref_list(H264Context *h){
2955 for(list=0; list<2; list++){ //FIXME try list_count
2956 for(i=0; i<h->ref_count[list]; i++){
2957 Picture *frame = &h->ref_list[list][i];
2958 Picture *field = &h->ref_list[list][16+2*i];
2961 field[0].linesize[j] <<= 1;
2962 field[0].reference = PICT_TOP_FIELD;
2963 field[0].poc= field[0].field_poc[0];
2964 field[1] = field[0];
2966 field[1].data[j] += frame->linesize[j];
2967 field[1].reference = PICT_BOTTOM_FIELD;
2968 field[1].poc= field[1].field_poc[1];
2970 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2971 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2973 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2974 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2978 for(j=0; j<h->ref_count[1]; j++){
2979 for(i=0; i<h->ref_count[0]; i++)
2980 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2981 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2982 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2986 static int pred_weight_table(H264Context *h){
2987 MpegEncContext * const s = &h->s;
2989 int luma_def, chroma_def;
2992 h->use_weight_chroma= 0;
2993 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2994 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2995 luma_def = 1<<h->luma_log2_weight_denom;
2996 chroma_def = 1<<h->chroma_log2_weight_denom;
2998 for(list=0; list<2; list++){
2999 for(i=0; i<h->ref_count[list]; i++){
3000 int luma_weight_flag, chroma_weight_flag;
3002 luma_weight_flag= get_bits1(&s->gb);
3003 if(luma_weight_flag){
3004 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3005 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3006 if( h->luma_weight[list][i] != luma_def
3007 || h->luma_offset[list][i] != 0)
3010 h->luma_weight[list][i]= luma_def;
3011 h->luma_offset[list][i]= 0;
3015 chroma_weight_flag= get_bits1(&s->gb);
3016 if(chroma_weight_flag){
3019 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3020 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3021 if( h->chroma_weight[list][i][j] != chroma_def
3022 || h->chroma_offset[list][i][j] != 0)
3023 h->use_weight_chroma= 1;
3028 h->chroma_weight[list][i][j]= chroma_def;
3029 h->chroma_offset[list][i][j]= 0;
3034 if(h->slice_type_nos != FF_B_TYPE) break;
3036 h->use_weight= h->use_weight || h->use_weight_chroma;
3040 static void implicit_weight_table(H264Context *h){
3041 MpegEncContext * const s = &h->s;
3043 int cur_poc = s->current_picture_ptr->poc;
3045 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3046 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3048 h->use_weight_chroma= 0;
3053 h->use_weight_chroma= 2;
3054 h->luma_log2_weight_denom= 5;
3055 h->chroma_log2_weight_denom= 5;
3057 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3058 int poc0 = h->ref_list[0][ref0].poc;
3059 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3060 int poc1 = h->ref_list[1][ref1].poc;
3061 int td = av_clip(poc1 - poc0, -128, 127);
3063 int tb = av_clip(cur_poc - poc0, -128, 127);
3064 int tx = (16384 + (FFABS(td) >> 1)) / td;
3065 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3066 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3067 h->implicit_weight[ref0][ref1] = 32;
3069 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3071 h->implicit_weight[ref0][ref1] = 32;
3077 * Mark a picture as no longer needed for reference. The refmask
3078 * argument allows unreferencing of individual fields or the whole frame.
3079 * If the picture becomes entirely unreferenced, but is being held for
3080 * display purposes, it is marked as such.
3081 * @param refmask mask of fields to unreference; the mask is bitwise
3082 * anded with the reference marking of pic
3083 * @return non-zero if pic becomes entirely unreferenced (except possibly
3084 * for display purposes) zero if one of the fields remains in
3087 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3089 if (pic->reference &= refmask) {
3092 for(i = 0; h->delayed_pic[i]; i++)
3093 if(pic == h->delayed_pic[i]){
3094 pic->reference=DELAYED_PIC_REF;
3102 * instantaneous decoder refresh.
3104 static void idr(H264Context *h){
3107 for(i=0; i<16; i++){
3108 remove_long(h, i, 0);
3110 assert(h->long_ref_count==0);
3112 for(i=0; i<h->short_ref_count; i++){
3113 unreference_pic(h, h->short_ref[i], 0);
3114 h->short_ref[i]= NULL;
3116 h->short_ref_count=0;
3117 h->prev_frame_num= 0;
3118 h->prev_frame_num_offset= 0;
3123 /* forget old pics after a seek */
3124 static void flush_dpb(AVCodecContext *avctx){
3125 H264Context *h= avctx->priv_data;
3127 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3128 if(h->delayed_pic[i])
3129 h->delayed_pic[i]->reference= 0;
3130 h->delayed_pic[i]= NULL;
3132 h->outputed_poc= INT_MIN;
3134 if(h->s.current_picture_ptr)
3135 h->s.current_picture_ptr->reference= 0;
3136 h->s.first_field= 0;
3137 ff_mpeg_flush(avctx);
3141 * Find a Picture in the short term reference list by frame number.
3142 * @param frame_num frame number to search for
3143 * @param idx the index into h->short_ref where returned picture is found
3144 * undefined if no picture found.
3145 * @return pointer to the found picture, or NULL if no pic with the provided
3146 * frame number is found
3148 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3149 MpegEncContext * const s = &h->s;
3152 for(i=0; i<h->short_ref_count; i++){
3153 Picture *pic= h->short_ref[i];
3154 if(s->avctx->debug&FF_DEBUG_MMCO)
3155 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3156 if(pic->frame_num == frame_num) {
3165 * Remove a picture from the short term reference list by its index in
3166 * that list. This does no checking on the provided index; it is assumed
3167 * to be valid. Other list entries are shifted down.
3168 * @param i index into h->short_ref of picture to remove.
3170 static void remove_short_at_index(H264Context *h, int i){
3171 assert(i >= 0 && i < h->short_ref_count);
3172 h->short_ref[i]= NULL;
3173 if (--h->short_ref_count)
3174 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3179 * @return the removed picture or NULL if an error occurs
3181 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3182 MpegEncContext * const s = &h->s;
3186 if(s->avctx->debug&FF_DEBUG_MMCO)
3187 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3189 pic = find_short(h, frame_num, &i);
3191 if(unreference_pic(h, pic, ref_mask))
3192 remove_short_at_index(h, i);
3199 * Remove a picture from the long term reference list by its index in
3201 * @return the removed picture or NULL if an error occurs
3203 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3206 pic= h->long_ref[i];
3208 if(unreference_pic(h, pic, ref_mask)){
3209 assert(h->long_ref[i]->long_ref == 1);
3210 h->long_ref[i]->long_ref= 0;
3211 h->long_ref[i]= NULL;
3212 h->long_ref_count--;
3220 * print short term list
3222 static void print_short_term(H264Context *h) {
3224 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3225 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3226 for(i=0; i<h->short_ref_count; i++){
3227 Picture *pic= h->short_ref[i];
3228 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3234 * print long term list
3236 static void print_long_term(H264Context *h) {
3238 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3239 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3240 for(i = 0; i < 16; i++){
3241 Picture *pic= h->long_ref[i];
3243 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3250 * Executes the reference picture marking (memory management control operations).
3252 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3253 MpegEncContext * const s = &h->s;
3255 int current_ref_assigned=0;
3258 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3261 for(i=0; i<mmco_count; i++){
3262 int structure, frame_num;
3263 if(s->avctx->debug&FF_DEBUG_MMCO)
3264 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3266 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3267 || mmco[i].opcode == MMCO_SHORT2LONG){
3268 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3269 pic = find_short(h, frame_num, &j);
3271 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3272 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3273 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3278 switch(mmco[i].opcode){
3279 case MMCO_SHORT2UNUSED:
3280 if(s->avctx->debug&FF_DEBUG_MMCO)
3281 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3282 remove_short(h, frame_num, structure ^ PICT_FRAME);
3284 case MMCO_SHORT2LONG:
3285 if (h->long_ref[mmco[i].long_arg] != pic)
3286 remove_long(h, mmco[i].long_arg, 0);
3288 remove_short_at_index(h, j);
3289 h->long_ref[ mmco[i].long_arg ]= pic;
3290 if (h->long_ref[ mmco[i].long_arg ]){
3291 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3292 h->long_ref_count++;
3295 case MMCO_LONG2UNUSED:
3296 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3297 pic = h->long_ref[j];
3299 remove_long(h, j, structure ^ PICT_FRAME);
3300 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3301 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3304 // Comment below left from previous code as it is an interresting note.
3305 /* First field in pair is in short term list or
3306 * at a different long term index.
3307 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3308 * Report the problem and keep the pair where it is,
3309 * and mark this field valid.
3312 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3313 remove_long(h, mmco[i].long_arg, 0);
3315 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3316 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3317 h->long_ref_count++;
3320 s->current_picture_ptr->reference |= s->picture_structure;
3321 current_ref_assigned=1;
3323 case MMCO_SET_MAX_LONG:
3324 assert(mmco[i].long_arg <= 16);
3325 // just remove the long term which index is greater than new max
3326 for(j = mmco[i].long_arg; j<16; j++){
3327 remove_long(h, j, 0);
3331 while(h->short_ref_count){
3332 remove_short(h, h->short_ref[0]->frame_num, 0);
3334 for(j = 0; j < 16; j++) {
3335 remove_long(h, j, 0);
3337 s->current_picture_ptr->poc=
3338 s->current_picture_ptr->field_poc[0]=
3339 s->current_picture_ptr->field_poc[1]=
3343 s->current_picture_ptr->frame_num= 0;
3349 if (!current_ref_assigned) {
3350 /* Second field of complementary field pair; the first field of
3351 * which is already referenced. If short referenced, it
3352 * should be first entry in short_ref. If not, it must exist
3353 * in long_ref; trying to put it on the short list here is an
3354 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3356 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3357 /* Just mark the second field valid */
3358 s->current_picture_ptr->reference = PICT_FRAME;
3359 } else if (s->current_picture_ptr->long_ref) {
3360 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3361 "assignment for second field "
3362 "in complementary field pair "
3363 "(first field is long term)\n");
3365 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3367 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3370 if(h->short_ref_count)
3371 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3373 h->short_ref[0]= s->current_picture_ptr;
3374 h->short_ref_count++;
3375 s->current_picture_ptr->reference |= s->picture_structure;
3379 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3381 /* We have too many reference frames, probably due to corrupted
3382 * stream. Need to discard one frame. Prevents overrun of the
3383 * short_ref and long_ref buffers.
3385 av_log(h->s.avctx, AV_LOG_ERROR,
3386 "number of reference frames exceeds max (probably "
3387 "corrupt input), discarding one\n");
3389 if (h->long_ref_count && !h->short_ref_count) {
3390 for (i = 0; i < 16; ++i)
3395 remove_long(h, i, 0);
3397 pic = h->short_ref[h->short_ref_count - 1];
3398 remove_short(h, pic->frame_num, 0);
3402 print_short_term(h);
3407 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3408 MpegEncContext * const s = &h->s;
3412 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3413 s->broken_link= get_bits1(gb) -1;
3415 h->mmco[0].opcode= MMCO_LONG;
3416 h->mmco[0].long_arg= 0;
3420 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3421 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3422 MMCOOpcode opcode= get_ue_golomb_31(gb);
3424 h->mmco[i].opcode= opcode;
3425 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3426 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3427 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3428 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3432 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3433 unsigned int long_arg= get_ue_golomb_31(gb);
3434 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3435 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3438 h->mmco[i].long_arg= long_arg;
3441 if(opcode > (unsigned)MMCO_LONG){
3442 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3445 if(opcode == MMCO_END)
3450 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3452 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3453 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3454 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3455 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3457 if (FIELD_PICTURE) {
3458 h->mmco[0].short_pic_num *= 2;
3459 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3460 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3470 static int init_poc(H264Context *h){
3471 MpegEncContext * const s = &h->s;
3472 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3474 Picture *cur = s->current_picture_ptr;
3476 h->frame_num_offset= h->prev_frame_num_offset;
3477 if(h->frame_num < h->prev_frame_num)
3478 h->frame_num_offset += max_frame_num;
3480 if(h->sps.poc_type==0){
3481 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3483 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3484 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3485 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3486 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3488 h->poc_msb = h->prev_poc_msb;
3489 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3491 field_poc[1] = h->poc_msb + h->poc_lsb;
3492 if(s->picture_structure == PICT_FRAME)
3493 field_poc[1] += h->delta_poc_bottom;
3494 }else if(h->sps.poc_type==1){
3495 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3498 if(h->sps.poc_cycle_length != 0)
3499 abs_frame_num = h->frame_num_offset + h->frame_num;
3503 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3506 expected_delta_per_poc_cycle = 0;
3507 for(i=0; i < h->sps.poc_cycle_length; i++)
3508 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3510 if(abs_frame_num > 0){
3511 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3512 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3514 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3515 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3516 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3520 if(h->nal_ref_idc == 0)
3521 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3523 field_poc[0] = expectedpoc + h->delta_poc[0];
3524 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3526 if(s->picture_structure == PICT_FRAME)
3527 field_poc[1] += h->delta_poc[1];
3529 int poc= 2*(h->frame_num_offset + h->frame_num);
3538 if(s->picture_structure != PICT_BOTTOM_FIELD)
3539 s->current_picture_ptr->field_poc[0]= field_poc[0];
3540 if(s->picture_structure != PICT_TOP_FIELD)
3541 s->current_picture_ptr->field_poc[1]= field_poc[1];
3542 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3549 * initialize scan tables
3551 static void init_scan_tables(H264Context *h){
3552 MpegEncContext * const s = &h->s;
3554 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3555 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3556 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3558 for(i=0; i<16; i++){
3559 #define T(x) (x>>2) | ((x<<2) & 0xF)
3560 h->zigzag_scan[i] = T(zigzag_scan[i]);
3561 h-> field_scan[i] = T( field_scan[i]);
3565 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3566 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3567 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3568 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3569 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3571 for(i=0; i<64; i++){
3572 #define T(x) (x>>3) | ((x&7)<<3)
3573 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3574 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3575 h->field_scan8x8[i] = T(field_scan8x8[i]);
3576 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3580 if(h->sps.transform_bypass){ //FIXME same ugly
3581 h->zigzag_scan_q0 = zigzag_scan;
3582 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3583 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3584 h->field_scan_q0 = field_scan;
3585 h->field_scan8x8_q0 = field_scan8x8;
3586 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3588 h->zigzag_scan_q0 = h->zigzag_scan;
3589 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3590 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3591 h->field_scan_q0 = h->field_scan;
3592 h->field_scan8x8_q0 = h->field_scan8x8;
3593 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3598 * Replicates H264 "master" context to thread contexts.
3600 static void clone_slice(H264Context *dst, H264Context *src)
3602 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3603 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3604 dst->s.current_picture = src->s.current_picture;
3605 dst->s.linesize = src->s.linesize;
3606 dst->s.uvlinesize = src->s.uvlinesize;
3607 dst->s.first_field = src->s.first_field;
3609 dst->prev_poc_msb = src->prev_poc_msb;
3610 dst->prev_poc_lsb = src->prev_poc_lsb;
3611 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3612 dst->prev_frame_num = src->prev_frame_num;
3613 dst->short_ref_count = src->short_ref_count;
3615 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3616 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3617 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3618 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3620 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3621 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3625 * decodes a slice header.
3626 * This will also call MPV_common_init() and frame_start() as needed.
3628 * @param h h264context
3629 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3631 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3633 static int decode_slice_header(H264Context *h, H264Context *h0){
3634 MpegEncContext * const s = &h->s;
3635 MpegEncContext * const s0 = &h0->s;
3636 unsigned int first_mb_in_slice;
3637 unsigned int pps_id;
3638 int num_ref_idx_active_override_flag;
3639 unsigned int slice_type, tmp, i, j;
3640 int default_ref_list_done = 0;
3641 int last_pic_structure;
3643 s->dropable= h->nal_ref_idc == 0;
3645 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3646 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3647 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3649 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3650 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3653 first_mb_in_slice= get_ue_golomb(&s->gb);
3655 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3656 h0->current_slice = 0;
3657 if (!s0->first_field)
3658 s->current_picture_ptr= NULL;
3661 slice_type= get_ue_golomb_31(&s->gb);
3663 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3668 h->slice_type_fixed=1;
3670 h->slice_type_fixed=0;
3672 slice_type= golomb_to_pict_type[ slice_type ];
3673 if (slice_type == FF_I_TYPE
3674 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3675 default_ref_list_done = 1;
3677 h->slice_type= slice_type;
3678 h->slice_type_nos= slice_type & 3;
3680 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3681 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3682 av_log(h->s.avctx, AV_LOG_ERROR,
3683 "B picture before any references, skipping\n");
3687 pps_id= get_ue_golomb(&s->gb);
3688 if(pps_id>=MAX_PPS_COUNT){
3689 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3692 if(!h0->pps_buffers[pps_id]) {
3693 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3696 h->pps= *h0->pps_buffers[pps_id];
3698 if(!h0->sps_buffers[h->pps.sps_id]) {
3699 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3702 h->sps = *h0->sps_buffers[h->pps.sps_id];
3704 if(h == h0 && h->dequant_coeff_pps != pps_id){
3705 h->dequant_coeff_pps = pps_id;
3706 init_dequant_tables(h);
3709 s->mb_width= h->sps.mb_width;
3710 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3712 h->b_stride= s->mb_width*4;
3713 h->b8_stride= s->mb_width*2;
3715 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3716 if(h->sps.frame_mbs_only_flag)
3717 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3719 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3721 if (s->context_initialized
3722 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3724 return -1; // width / height changed during parallelized decoding
3726 flush_dpb(s->avctx);
3729 if (!s->context_initialized) {
3731 return -1; // we cant (re-)initialize context during parallel decoding
3732 if (MPV_common_init(s) < 0)
3736 init_scan_tables(h);
3739 for(i = 1; i < s->avctx->thread_count; i++) {
3741 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3742 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3743 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3746 init_scan_tables(c);
3750 for(i = 0; i < s->avctx->thread_count; i++)
3751 if(context_init(h->thread_context[i]) < 0)
3754 s->avctx->width = s->width;
3755 s->avctx->height = s->height;
3756 s->avctx->sample_aspect_ratio= h->sps.sar;
3757 if(!s->avctx->sample_aspect_ratio.den)
3758 s->avctx->sample_aspect_ratio.den = 1;
3760 if(h->sps.timing_info_present_flag){
3761 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3762 if(h->x264_build > 0 && h->x264_build < 44)
3763 s->avctx->time_base.den *= 2;
3764 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3765 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3769 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3772 h->mb_aff_frame = 0;
3773 last_pic_structure = s0->picture_structure;
3774 if(h->sps.frame_mbs_only_flag){
3775 s->picture_structure= PICT_FRAME;
3777 if(get_bits1(&s->gb)) { //field_pic_flag
3778 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3780 s->picture_structure= PICT_FRAME;
3781 h->mb_aff_frame = h->sps.mb_aff;
3784 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3786 if(h0->current_slice == 0){
3787 while(h->frame_num != h->prev_frame_num &&
3788 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3789 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3791 h->prev_frame_num++;
3792 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3793 s->current_picture_ptr->frame_num= h->prev_frame_num;
3794 execute_ref_pic_marking(h, NULL, 0);
3797 /* See if we have a decoded first field looking for a pair... */
3798 if (s0->first_field) {
3799 assert(s0->current_picture_ptr);
3800 assert(s0->current_picture_ptr->data[0]);
3801 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3803 /* figure out if we have a complementary field pair */
3804 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3806 * Previous field is unmatched. Don't display it, but let it
3807 * remain for reference if marked as such.
3809 s0->current_picture_ptr = NULL;
3810 s0->first_field = FIELD_PICTURE;
3813 if (h->nal_ref_idc &&
3814 s0->current_picture_ptr->reference &&
3815 s0->current_picture_ptr->frame_num != h->frame_num) {
3817 * This and previous field were reference, but had
3818 * different frame_nums. Consider this field first in
3819 * pair. Throw away previous field except for reference
3822 s0->first_field = 1;
3823 s0->current_picture_ptr = NULL;
3826 /* Second field in complementary pair */
3827 s0->first_field = 0;
3832 /* Frame or first field in a potentially complementary pair */
3833 assert(!s0->current_picture_ptr);
3834 s0->first_field = FIELD_PICTURE;
3837 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3838 s0->first_field = 0;
3845 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3847 assert(s->mb_num == s->mb_width * s->mb_height);
3848 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3849 first_mb_in_slice >= s->mb_num){
3850 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3853 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3854 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3855 if (s->picture_structure == PICT_BOTTOM_FIELD)
3856 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3857 assert(s->mb_y < s->mb_height);
3859 if(s->picture_structure==PICT_FRAME){
3860 h->curr_pic_num= h->frame_num;
3861 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3863 h->curr_pic_num= 2*h->frame_num + 1;
3864 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3867 if(h->nal_unit_type == NAL_IDR_SLICE){
3868 get_ue_golomb(&s->gb); /* idr_pic_id */
3871 if(h->sps.poc_type==0){
3872 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3874 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3875 h->delta_poc_bottom= get_se_golomb(&s->gb);
3879 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3880 h->delta_poc[0]= get_se_golomb(&s->gb);
3882 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3883 h->delta_poc[1]= get_se_golomb(&s->gb);
3888 if(h->pps.redundant_pic_cnt_present){
3889 h->redundant_pic_count= get_ue_golomb(&s->gb);
3892 //set defaults, might be overridden a few lines later
3893 h->ref_count[0]= h->pps.ref_count[0];
3894 h->ref_count[1]= h->pps.ref_count[1];
3896 if(h->slice_type_nos != FF_I_TYPE){
3897 if(h->slice_type_nos == FF_B_TYPE){
3898 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3900 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3902 if(num_ref_idx_active_override_flag){
3903 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3904 if(h->slice_type_nos==FF_B_TYPE)
3905 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3907 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3908 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3909 h->ref_count[0]= h->ref_count[1]= 1;
3913 if(h->slice_type_nos == FF_B_TYPE)
3920 if(!default_ref_list_done){
3921 fill_default_ref_list(h);
3924 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3927 if(h->slice_type_nos!=FF_I_TYPE){
3928 s->last_picture_ptr= &h->ref_list[0][0];
3929 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3931 if(h->slice_type_nos==FF_B_TYPE){
3932 s->next_picture_ptr= &h->ref_list[1][0];
3933 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3936 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3937 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3938 pred_weight_table(h);
3939 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3940 implicit_weight_table(h);
3945 decode_ref_pic_marking(h0, &s->gb);
3948 fill_mbaff_ref_list(h);
3950 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3951 direct_dist_scale_factor(h);
3952 direct_ref_list_init(h);
3954 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3955 tmp = get_ue_golomb_31(&s->gb);
3957 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3960 h->cabac_init_idc= tmp;
3963 h->last_qscale_diff = 0;
3964 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3966 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3970 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3971 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3972 //FIXME qscale / qp ... stuff
3973 if(h->slice_type == FF_SP_TYPE){
3974 get_bits1(&s->gb); /* sp_for_switch_flag */
3976 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3977 get_se_golomb(&s->gb); /* slice_qs_delta */
3980 h->deblocking_filter = 1;
3981 h->slice_alpha_c0_offset = 0;
3982 h->slice_beta_offset = 0;
3983 if( h->pps.deblocking_filter_parameters_present ) {
3984 tmp= get_ue_golomb_31(&s->gb);
3986 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3989 h->deblocking_filter= tmp;
3990 if(h->deblocking_filter < 2)
3991 h->deblocking_filter^= 1; // 1<->0
3993 if( h->deblocking_filter ) {
3994 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3995 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3999 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4000 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4001 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4002 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4003 h->deblocking_filter= 0;
4005 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4006 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4007 /* Cheat slightly for speed:
4008 Do not bother to deblock across slices. */
4009 h->deblocking_filter = 2;
4011 h0->max_contexts = 1;
4012 if(!h0->single_decode_warning) {
4013 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4014 h0->single_decode_warning = 1;
4017 return 1; // deblocking switched inside frame
4022 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4023 slice_group_change_cycle= get_bits(&s->gb, ?);
4026 h0->last_slice_type = slice_type;
4027 h->slice_num = ++h0->current_slice;
4028 if(h->slice_num >= MAX_SLICES){
4029 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4033 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4037 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4038 +(h->ref_list[j][i].reference&3);
4041 for(i=16; i<48; i++)
4042 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4043 +(h->ref_list[j][i].reference&3);
4046 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4047 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4049 s->avctx->refs= h->sps.ref_frame_count;
4051 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4052 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4054 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4056 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4057 pps_id, h->frame_num,
4058 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4059 h->ref_count[0], h->ref_count[1],
4061 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4063 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4064 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4074 static inline int get_level_prefix(GetBitContext *gb){
4078 OPEN_READER(re, gb);
4079 UPDATE_CACHE(re, gb);
4080 buf=GET_CACHE(re, gb);
4082 log= 32 - av_log2(buf);
4084 print_bin(buf>>(32-log), log);
4085 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4088 LAST_SKIP_BITS(re, gb, log);
4089 CLOSE_READER(re, gb);
4094 static inline int get_dct8x8_allowed(H264Context *h){
4095 if(h->sps.direct_8x8_inference_flag)
4096 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4098 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4102 * decodes a residual block.
4103 * @param n block index
4104 * @param scantable scantable
4105 * @param max_coeff number of coefficients in the block
4106 * @return <0 if an error occurred
4108 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4109 MpegEncContext * const s = &h->s;
4110 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4112 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4114 //FIXME put trailing_onex into the context
4116 if(n == CHROMA_DC_BLOCK_INDEX){
4117 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4118 total_coeff= coeff_token>>2;
4120 if(n == LUMA_DC_BLOCK_INDEX){
4121 total_coeff= pred_non_zero_count(h, 0);
4122 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4123 total_coeff= coeff_token>>2;
4125 total_coeff= pred_non_zero_count(h, n);
4126 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4127 total_coeff= coeff_token>>2;
4128 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4132 //FIXME set last_non_zero?
4136 if(total_coeff > (unsigned)max_coeff) {
4137 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4141 trailing_ones= coeff_token&3;
4142 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4143 assert(total_coeff<=16);
4145 i = show_bits(gb, 3);
4146 skip_bits(gb, trailing_ones);
4147 level[0] = 1-((i&4)>>1);
4148 level[1] = 1-((i&2) );
4149 level[2] = 1-((i&1)<<1);
4151 if(trailing_ones<total_coeff) {
4153 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4154 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4155 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4157 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4158 if(level_code >= 100){
4159 prefix= level_code - 100;
4160 if(prefix == LEVEL_TAB_BITS)
4161 prefix += get_level_prefix(gb);
4163 //first coefficient has suffix_length equal to 0 or 1
4164 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4166 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4168 level_code= (prefix<<suffix_length); //part
4169 }else if(prefix==14){
4171 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4173 level_code= prefix + get_bits(gb, 4); //part
4175 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4176 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4178 level_code += (1<<(prefix-3))-4096;
4181 if(trailing_ones < 3) level_code += 2;
4184 mask= -(level_code&1);
4185 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4187 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4190 if(level_code + 3U > 6U)
4192 level[trailing_ones]= level_code;
4195 //remaining coefficients have suffix_length > 0
4196 for(i=trailing_ones+1;i<total_coeff;i++) {
4197 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4198 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4199 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4201 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4202 if(level_code >= 100){
4203 prefix= level_code - 100;
4204 if(prefix == LEVEL_TAB_BITS){
4205 prefix += get_level_prefix(gb);
4208 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4210 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4212 level_code += (1<<(prefix-3))-4096;
4214 mask= -(level_code&1);
4215 level_code= (((2+level_code)>>1) ^ mask) - mask;
4217 level[i]= level_code;
4219 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4224 if(total_coeff == max_coeff)
4227 if(n == CHROMA_DC_BLOCK_INDEX)
4228 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4230 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4233 coeff_num = zeros_left + total_coeff - 1;
4234 j = scantable[coeff_num];
4236 block[j] = level[0];
4237 for(i=1;i<total_coeff;i++) {
4240 else if(zeros_left < 7){
4241 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4243 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4245 zeros_left -= run_before;
4246 coeff_num -= 1 + run_before;
4247 j= scantable[ coeff_num ];
4252 block[j] = (level[0] * qmul[j] + 32)>>6;
4253 for(i=1;i<total_coeff;i++) {
4256 else if(zeros_left < 7){
4257 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4259 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4261 zeros_left -= run_before;
4262 coeff_num -= 1 + run_before;
4263 j= scantable[ coeff_num ];
4265 block[j]= (level[i] * qmul[j] + 32)>>6;
4270 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4277 static void predict_field_decoding_flag(H264Context *h){
4278 MpegEncContext * const s = &h->s;
4279 const int mb_xy= h->mb_xy;
4280 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4281 ? s->current_picture.mb_type[mb_xy-1]
4282 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4283 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4285 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4289 * decodes a P_SKIP or B_SKIP macroblock
4291 static void decode_mb_skip(H264Context *h){
4292 MpegEncContext * const s = &h->s;
4293 const int mb_xy= h->mb_xy;
4296 memset(h->non_zero_count[mb_xy], 0, 16);
4297 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4300 mb_type|= MB_TYPE_INTERLACED;
4302 if( h->slice_type_nos == FF_B_TYPE )
4304 // just for fill_caches. pred_direct_motion will set the real mb_type
4305 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4307 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4308 pred_direct_motion(h, &mb_type);
4309 mb_type|= MB_TYPE_SKIP;
4314 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4316 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4317 pred_pskip_motion(h, &mx, &my);
4318 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4319 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4322 write_back_motion(h, mb_type);
4323 s->current_picture.mb_type[mb_xy]= mb_type;
4324 s->current_picture.qscale_table[mb_xy]= s->qscale;
4325 h->slice_table[ mb_xy ]= h->slice_num;
4326 h->prev_mb_skipped= 1;
4330 * decodes a macroblock
4331 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4333 static int decode_mb_cavlc(H264Context *h){
4334 MpegEncContext * const s = &h->s;
4336 int partition_count;
4337 unsigned int mb_type, cbp;
4338 int dct8x8_allowed= h->pps.transform_8x8_mode;
4340 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4342 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4343 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4345 if(h->slice_type_nos != FF_I_TYPE){
4346 if(s->mb_skip_run==-1)
4347 s->mb_skip_run= get_ue_golomb(&s->gb);
4349 if (s->mb_skip_run--) {
4350 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4351 if(s->mb_skip_run==0)
4352 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4354 predict_field_decoding_flag(h);
4361 if( (s->mb_y&1) == 0 )
4362 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4365 h->prev_mb_skipped= 0;
4367 mb_type= get_ue_golomb(&s->gb);
4368 if(h->slice_type_nos == FF_B_TYPE){
4370 partition_count= b_mb_type_info[mb_type].partition_count;
4371 mb_type= b_mb_type_info[mb_type].type;
4374 goto decode_intra_mb;
4376 }else if(h->slice_type_nos == FF_P_TYPE){
4378 partition_count= p_mb_type_info[mb_type].partition_count;
4379 mb_type= p_mb_type_info[mb_type].type;
4382 goto decode_intra_mb;
4385 assert(h->slice_type_nos == FF_I_TYPE);
4386 if(h->slice_type == FF_SI_TYPE && mb_type)
4390 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4394 cbp= i_mb_type_info[mb_type].cbp;
4395 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4396 mb_type= i_mb_type_info[mb_type].type;
4400 mb_type |= MB_TYPE_INTERLACED;
4402 h->slice_table[ mb_xy ]= h->slice_num;
4404 if(IS_INTRA_PCM(mb_type)){
4407 // We assume these blocks are very rare so we do not optimize it.
4408 align_get_bits(&s->gb);
4410 // The pixels are stored in the same order as levels in h->mb array.
4411 for(x=0; x < (CHROMA ? 384 : 256); x++){
4412 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4415 // In deblocking, the quantizer is 0
4416 s->current_picture.qscale_table[mb_xy]= 0;
4417 // All coeffs are present
4418 memset(h->non_zero_count[mb_xy], 16, 16);
4420 s->current_picture.mb_type[mb_xy]= mb_type;
4425 h->ref_count[0] <<= 1;
4426 h->ref_count[1] <<= 1;
4429 fill_caches(h, mb_type, 0);
4432 if(IS_INTRA(mb_type)){
4434 // init_top_left_availability(h);
4435 if(IS_INTRA4x4(mb_type)){
4438 if(dct8x8_allowed && get_bits1(&s->gb)){
4439 mb_type |= MB_TYPE_8x8DCT;
4443 // fill_intra4x4_pred_table(h);
4444 for(i=0; i<16; i+=di){
4445 int mode= pred_intra_mode(h, i);
4447 if(!get_bits1(&s->gb)){
4448 const int rem_mode= get_bits(&s->gb, 3);
4449 mode = rem_mode + (rem_mode >= mode);
4453 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4455 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4457 write_back_intra_pred_mode(h);
4458 if( check_intra4x4_pred_mode(h) < 0)
4461 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4462 if(h->intra16x16_pred_mode < 0)
4466 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4469 h->chroma_pred_mode= pred_mode;
4471 }else if(partition_count==4){
4472 int i, j, sub_partition_count[4], list, ref[2][4];
4474 if(h->slice_type_nos == FF_B_TYPE){
4476 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4477 if(h->sub_mb_type[i] >=13){
4478 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4481 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4482 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4484 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4485 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4486 pred_direct_motion(h, &mb_type);
4487 h->ref_cache[0][scan8[4]] =
4488 h->ref_cache[1][scan8[4]] =
4489 h->ref_cache[0][scan8[12]] =
4490 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4493 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4495 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4496 if(h->sub_mb_type[i] >=4){
4497 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4500 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4501 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4505 for(list=0; list<h->list_count; list++){
4506 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4508 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4509 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4513 }else if(ref_count == 2){
4514 tmp= get_bits1(&s->gb)^1;
4516 tmp= get_ue_golomb_31(&s->gb);
4518 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4531 dct8x8_allowed = get_dct8x8_allowed(h);
4533 for(list=0; list<h->list_count; list++){
4535 if(IS_DIRECT(h->sub_mb_type[i])) {
4536 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4539 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4540 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4542 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4543 const int sub_mb_type= h->sub_mb_type[i];
4544 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4545 for(j=0; j<sub_partition_count[i]; j++){
4547 const int index= 4*i + block_width*j;
4548 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4549 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4550 mx += get_se_golomb(&s->gb);
4551 my += get_se_golomb(&s->gb);
4552 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4554 if(IS_SUB_8X8(sub_mb_type)){
4556 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4558 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4559 }else if(IS_SUB_8X4(sub_mb_type)){
4560 mv_cache[ 1 ][0]= mx;
4561 mv_cache[ 1 ][1]= my;
4562 }else if(IS_SUB_4X8(sub_mb_type)){
4563 mv_cache[ 8 ][0]= mx;
4564 mv_cache[ 8 ][1]= my;
4566 mv_cache[ 0 ][0]= mx;
4567 mv_cache[ 0 ][1]= my;
4570 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4576 }else if(IS_DIRECT(mb_type)){
4577 pred_direct_motion(h, &mb_type);
4578 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4580 int list, mx, my, i;
4581 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4582 if(IS_16X16(mb_type)){
4583 for(list=0; list<h->list_count; list++){
4585 if(IS_DIR(mb_type, 0, list)){
4586 if(h->ref_count[list]==1){
4588 }else if(h->ref_count[list]==2){
4589 val= get_bits1(&s->gb)^1;
4591 val= get_ue_golomb_31(&s->gb);
4592 if(val >= h->ref_count[list]){
4593 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4598 val= LIST_NOT_USED&0xFF;
4599 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4601 for(list=0; list<h->list_count; list++){
4603 if(IS_DIR(mb_type, 0, list)){
4604 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4605 mx += get_se_golomb(&s->gb);
4606 my += get_se_golomb(&s->gb);
4607 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4609 val= pack16to32(mx,my);
4612 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4615 else if(IS_16X8(mb_type)){
4616 for(list=0; list<h->list_count; list++){
4619 if(IS_DIR(mb_type, i, list)){
4620 if(h->ref_count[list] == 1){
4622 }else if(h->ref_count[list] == 2){
4623 val= get_bits1(&s->gb)^1;
4625 val= get_ue_golomb_31(&s->gb);
4626 if(val >= h->ref_count[list]){
4627 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4632 val= LIST_NOT_USED&0xFF;
4633 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4636 for(list=0; list<h->list_count; list++){
4639 if(IS_DIR(mb_type, i, list)){
4640 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4641 mx += get_se_golomb(&s->gb);
4642 my += get_se_golomb(&s->gb);
4643 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4645 val= pack16to32(mx,my);
4648 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4652 assert(IS_8X16(mb_type));
4653 for(list=0; list<h->list_count; list++){
4656 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4657 if(h->ref_count[list]==1){
4659 }else if(h->ref_count[list]==2){
4660 val= get_bits1(&s->gb)^1;
4662 val= get_ue_golomb_31(&s->gb);
4663 if(val >= h->ref_count[list]){
4664 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4669 val= LIST_NOT_USED&0xFF;
4670 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4673 for(list=0; list<h->list_count; list++){
4676 if(IS_DIR(mb_type, i, list)){
4677 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4678 mx += get_se_golomb(&s->gb);
4679 my += get_se_golomb(&s->gb);
4680 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4682 val= pack16to32(mx,my);
4685 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4691 if(IS_INTER(mb_type))
4692 write_back_motion(h, mb_type);
4694 if(!IS_INTRA16x16(mb_type)){
4695 cbp= get_ue_golomb(&s->gb);
4697 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4702 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4703 else cbp= golomb_to_inter_cbp [cbp];
4705 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4706 else cbp= golomb_to_inter_cbp_gray[cbp];
4711 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4712 if(get_bits1(&s->gb)){
4713 mb_type |= MB_TYPE_8x8DCT;
4714 h->cbp_table[mb_xy]= cbp;
4717 s->current_picture.mb_type[mb_xy]= mb_type;
4719 if(cbp || IS_INTRA16x16(mb_type)){
4720 int i8x8, i4x4, chroma_idx;
4722 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4723 const uint8_t *scan, *scan8x8, *dc_scan;
4725 // fill_non_zero_count_cache(h);
4727 if(IS_INTERLACED(mb_type)){
4728 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4729 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4730 dc_scan= luma_dc_field_scan;
4732 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4733 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4734 dc_scan= luma_dc_zigzag_scan;
4737 dquant= get_se_golomb(&s->gb);
4739 if( dquant > 25 || dquant < -26 ){
4740 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4744 s->qscale += dquant;
4745 if(((unsigned)s->qscale) > 51){
4746 if(s->qscale<0) s->qscale+= 52;
4747 else s->qscale-= 52;
4750 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4751 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4752 if(IS_INTRA16x16(mb_type)){
4753 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4754 return -1; //FIXME continue if partitioned and other return -1 too
4757 assert((cbp&15) == 0 || (cbp&15) == 15);
4760 for(i8x8=0; i8x8<4; i8x8++){
4761 for(i4x4=0; i4x4<4; i4x4++){
4762 const int index= i4x4 + 4*i8x8;
4763 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4769 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4772 for(i8x8=0; i8x8<4; i8x8++){
4773 if(cbp & (1<<i8x8)){
4774 if(IS_8x8DCT(mb_type)){
4775 DCTELEM *buf = &h->mb[64*i8x8];
4777 for(i4x4=0; i4x4<4; i4x4++){
4778 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4779 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4782 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4783 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4785 for(i4x4=0; i4x4<4; i4x4++){
4786 const int index= i4x4 + 4*i8x8;
4788 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4794 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4795 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4801 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4802 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4808 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4809 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4810 for(i4x4=0; i4x4<4; i4x4++){
4811 const int index= 16 + 4*chroma_idx + i4x4;
4812 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4818 uint8_t * const nnz= &h->non_zero_count_cache[0];
4819 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4820 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4823 uint8_t * const nnz= &h->non_zero_count_cache[0];
4824 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4825 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4826 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4828 s->current_picture.qscale_table[mb_xy]= s->qscale;
4829 write_back_non_zero_count(h);
4832 h->ref_count[0] >>= 1;
4833 h->ref_count[1] >>= 1;
4839 static int decode_cabac_field_decoding_flag(H264Context *h) {
4840 MpegEncContext * const s = &h->s;
4841 const int mb_x = s->mb_x;
4842 const int mb_y = s->mb_y & ~1;
4843 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4844 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4846 unsigned int ctx = 0;
4848 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4851 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4855 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4858 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4859 uint8_t *state= &h->cabac_state[ctx_base];
4863 MpegEncContext * const s = &h->s;
4864 const int mba_xy = h->left_mb_xy[0];
4865 const int mbb_xy = h->top_mb_xy;
4867 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4869 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4871 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4872 return 0; /* I4x4 */
4875 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4876 return 0; /* I4x4 */
4879 if( get_cabac_terminate( &h->cabac ) )
4880 return 25; /* PCM */
4882 mb_type = 1; /* I16x16 */
4883 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4884 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4885 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4886 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4887 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4891 static int decode_cabac_mb_type_b( H264Context *h ) {
4892 MpegEncContext * const s = &h->s;
4894 const int mba_xy = h->left_mb_xy[0];
4895 const int mbb_xy = h->top_mb_xy;
4898 assert(h->slice_type_nos == FF_B_TYPE);
4900 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4902 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4905 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4906 return 0; /* B_Direct_16x16 */
4908 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4909 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4912 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4913 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4914 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4915 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4917 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4918 else if( bits == 13 ) {
4919 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4920 } else if( bits == 14 )
4921 return 11; /* B_L1_L0_8x16 */
4922 else if( bits == 15 )
4923 return 22; /* B_8x8 */
4925 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4926 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4929 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4930 MpegEncContext * const s = &h->s;
4934 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4935 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4938 && h->slice_table[mba_xy] == h->slice_num
4939 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4940 mba_xy += s->mb_stride;
4942 mbb_xy = mb_xy - s->mb_stride;
4944 && h->slice_table[mbb_xy] == h->slice_num
4945 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4946 mbb_xy -= s->mb_stride;
4948 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4950 int mb_xy = h->mb_xy;
4952 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4955 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4957 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4960 if( h->slice_type_nos == FF_B_TYPE )
4962 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4965 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4968 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4971 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4972 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4973 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4975 if( mode >= pred_mode )
4981 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4982 const int mba_xy = h->left_mb_xy[0];
4983 const int mbb_xy = h->top_mb_xy;
4987 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4988 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4991 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4994 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4997 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4999 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5005 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5006 int cbp_b, cbp_a, ctx, cbp = 0;
5008 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5009 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5011 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5012 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5013 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5014 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5015 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5016 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5017 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5018 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5021 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5025 cbp_a = (h->left_cbp>>4)&0x03;
5026 cbp_b = (h-> top_cbp>>4)&0x03;
5029 if( cbp_a > 0 ) ctx++;
5030 if( cbp_b > 0 ) ctx += 2;
5031 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5035 if( cbp_a == 2 ) ctx++;
5036 if( cbp_b == 2 ) ctx += 2;
5037 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5039 static int decode_cabac_mb_dqp( H264Context *h) {
5040 int ctx= h->last_qscale_diff != 0;
5043 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5046 if(val > 102) //prevent infinite loop
5051 return (val + 1)>>1 ;
5053 return -((val + 1)>>1);
5055 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5056 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5058 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5060 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5064 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5066 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5067 return 0; /* B_Direct_8x8 */
5068 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5069 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5071 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5072 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5073 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5076 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5077 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5081 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5082 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5085 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5086 int refa = h->ref_cache[list][scan8[n] - 1];
5087 int refb = h->ref_cache[list][scan8[n] - 8];
5091 if( h->slice_type_nos == FF_B_TYPE) {
5092 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5094 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5103 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5106 if(ref >= 32 /*h->ref_list[list]*/){
5113 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5114 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5115 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5116 int ctxbase = (l == 0) ? 40 : 47;
5118 int ctx = (amvd>2) + (amvd>32);
5120 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5125 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5133 while( get_cabac_bypass( &h->cabac ) ) {
5137 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5142 if( get_cabac_bypass( &h->cabac ) )
5146 return get_cabac_bypass_sign( &h->cabac, -mvd );
5149 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5155 nza = h->left_cbp&0x100;
5156 nzb = h-> top_cbp&0x100;
5158 nza = (h->left_cbp>>(6+idx))&0x01;
5159 nzb = (h-> top_cbp>>(6+idx))&0x01;
5162 assert(cat == 1 || cat == 2 || cat == 4);
5163 nza = h->non_zero_count_cache[scan8[idx] - 1];
5164 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5173 return ctx + 4 * cat;
5176 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5177 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5178 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5179 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5180 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5183 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5184 static const int significant_coeff_flag_offset[2][6] = {
5185 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5186 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5188 static const int last_coeff_flag_offset[2][6] = {
5189 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5190 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5192 static const int coeff_abs_level_m1_offset[6] = {
5193 227+0, 227+10, 227+20, 227+30, 227+39, 426
5195 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5196 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5197 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5198 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5199 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5200 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5201 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5202 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5203 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5205 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5206 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5207 * map node ctx => cabac ctx for level=1 */
5208 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5209 /* map node ctx => cabac ctx for level>1 */
5210 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5211 static const uint8_t coeff_abs_level_transition[2][8] = {
5212 /* update node ctx after decoding a level=1 */
5213 { 1, 2, 3, 3, 4, 5, 6, 7 },
5214 /* update node ctx after decoding a level>1 */
5215 { 4, 4, 4, 4, 5, 6, 7, 7 }
5221 int coeff_count = 0;
5224 uint8_t *significant_coeff_ctx_base;
5225 uint8_t *last_coeff_ctx_base;
5226 uint8_t *abs_level_m1_ctx_base;
5229 #define CABAC_ON_STACK
5231 #ifdef CABAC_ON_STACK
5234 cc.range = h->cabac.range;
5235 cc.low = h->cabac.low;
5236 cc.bytestream= h->cabac.bytestream;
5238 #define CC &h->cabac
5242 /* cat: 0-> DC 16x16 n = 0
5243 * 1-> AC 16x16 n = luma4x4idx
5244 * 2-> Luma4x4 n = luma4x4idx
5245 * 3-> DC Chroma n = iCbCr
5246 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5247 * 5-> Luma8x8 n = 4 * luma8x8idx
5250 /* read coded block flag */
5251 if( is_dc || cat != 5 ) {
5252 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5254 h->non_zero_count_cache[scan8[n]] = 0;
5256 #ifdef CABAC_ON_STACK
5257 h->cabac.range = cc.range ;
5258 h->cabac.low = cc.low ;
5259 h->cabac.bytestream= cc.bytestream;
5265 significant_coeff_ctx_base = h->cabac_state
5266 + significant_coeff_flag_offset[MB_FIELD][cat];
5267 last_coeff_ctx_base = h->cabac_state
5268 + last_coeff_flag_offset[MB_FIELD][cat];
5269 abs_level_m1_ctx_base = h->cabac_state
5270 + coeff_abs_level_m1_offset[cat];
5272 if( !is_dc && cat == 5 ) {
5273 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5274 for(last= 0; last < coefs; last++) { \
5275 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5276 if( get_cabac( CC, sig_ctx )) { \
5277 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5278 index[coeff_count++] = last; \
5279 if( get_cabac( CC, last_ctx ) ) { \
5285 if( last == max_coeff -1 ) {\
5286 index[coeff_count++] = last;\
5288 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5289 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5290 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5292 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5294 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5296 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5299 assert(coeff_count > 0);
5303 h->cbp_table[h->mb_xy] |= 0x100;
5305 h->cbp_table[h->mb_xy] |= 0x40 << n;
5308 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5310 assert( cat == 1 || cat == 2 || cat == 4 );
5311 h->non_zero_count_cache[scan8[n]] = coeff_count;
5316 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5318 int j= scantable[index[--coeff_count]];
5320 if( get_cabac( CC, ctx ) == 0 ) {
5321 node_ctx = coeff_abs_level_transition[0][node_ctx];
5323 block[j] = get_cabac_bypass_sign( CC, -1);
5325 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5329 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5330 node_ctx = coeff_abs_level_transition[1][node_ctx];
5332 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5336 if( coeff_abs >= 15 ) {
5338 while( get_cabac_bypass( CC ) ) {
5344 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5350 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5352 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5355 } while( coeff_count );
5356 #ifdef CABAC_ON_STACK
5357 h->cabac.range = cc.range ;
5358 h->cabac.low = cc.low ;
5359 h->cabac.bytestream= cc.bytestream;
5365 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5366 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5369 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5370 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5374 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5376 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5378 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5379 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5383 static inline void compute_mb_neighbors(H264Context *h)
5385 MpegEncContext * const s = &h->s;
5386 const int mb_xy = h->mb_xy;
5387 h->top_mb_xy = mb_xy - s->mb_stride;
5388 h->left_mb_xy[0] = mb_xy - 1;
5390 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5391 const int top_pair_xy = pair_xy - s->mb_stride;
5392 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5393 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5394 const int curr_mb_field_flag = MB_FIELD;
5395 const int bottom = (s->mb_y & 1);
5397 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5398 h->top_mb_xy -= s->mb_stride;
5400 if (!left_mb_field_flag == curr_mb_field_flag) {
5401 h->left_mb_xy[0] = pair_xy - 1;
5403 } else if (FIELD_PICTURE) {
5404 h->top_mb_xy -= s->mb_stride;
5410 * decodes a macroblock
5411 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5413 static int decode_mb_cabac(H264Context *h) {
5414 MpegEncContext * const s = &h->s;
5416 int mb_type, partition_count, cbp = 0;
5417 int dct8x8_allowed= h->pps.transform_8x8_mode;
5419 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5421 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5422 if( h->slice_type_nos != FF_I_TYPE ) {
5424 /* a skipped mb needs the aff flag from the following mb */
5425 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5426 predict_field_decoding_flag(h);
5427 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5428 skip = h->next_mb_skipped;
5430 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5431 /* read skip flags */
5433 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5434 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5435 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5436 if(!h->next_mb_skipped)
5437 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5442 h->cbp_table[mb_xy] = 0;
5443 h->chroma_pred_mode_table[mb_xy] = 0;
5444 h->last_qscale_diff = 0;
5451 if( (s->mb_y&1) == 0 )
5453 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5456 h->prev_mb_skipped = 0;
5458 compute_mb_neighbors(h);
5460 if( h->slice_type_nos == FF_B_TYPE ) {
5461 mb_type = decode_cabac_mb_type_b( h );
5463 partition_count= b_mb_type_info[mb_type].partition_count;
5464 mb_type= b_mb_type_info[mb_type].type;
5467 goto decode_intra_mb;
5469 } else if( h->slice_type_nos == FF_P_TYPE ) {
5470 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5472 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5473 /* P_L0_D16x16, P_8x8 */
5474 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5476 /* P_L0_D8x16, P_L0_D16x8 */
5477 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5479 partition_count= p_mb_type_info[mb_type].partition_count;
5480 mb_type= p_mb_type_info[mb_type].type;
5482 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5483 goto decode_intra_mb;
5486 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5487 if(h->slice_type == FF_SI_TYPE && mb_type)
5489 assert(h->slice_type_nos == FF_I_TYPE);
5491 partition_count = 0;
5492 cbp= i_mb_type_info[mb_type].cbp;
5493 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5494 mb_type= i_mb_type_info[mb_type].type;
5497 mb_type |= MB_TYPE_INTERLACED;
5499 h->slice_table[ mb_xy ]= h->slice_num;
5501 if(IS_INTRA_PCM(mb_type)) {
5504 // We assume these blocks are very rare so we do not optimize it.
5505 // FIXME The two following lines get the bitstream position in the cabac
5506 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5507 ptr= h->cabac.bytestream;
5508 if(h->cabac.low&0x1) ptr--;
5510 if(h->cabac.low&0x1FF) ptr--;
5513 // The pixels are stored in the same order as levels in h->mb array.
5514 memcpy(h->mb, ptr, 256); ptr+=256;
5516 memcpy(h->mb+128, ptr, 128); ptr+=128;
5519 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5521 // All blocks are present
5522 h->cbp_table[mb_xy] = 0x1ef;
5523 h->chroma_pred_mode_table[mb_xy] = 0;
5524 // In deblocking, the quantizer is 0
5525 s->current_picture.qscale_table[mb_xy]= 0;
5526 // All coeffs are present
5527 memset(h->non_zero_count[mb_xy], 16, 16);
5528 s->current_picture.mb_type[mb_xy]= mb_type;
5529 h->last_qscale_diff = 0;
5534 h->ref_count[0] <<= 1;
5535 h->ref_count[1] <<= 1;
5538 fill_caches(h, mb_type, 0);
5540 if( IS_INTRA( mb_type ) ) {
5542 if( IS_INTRA4x4( mb_type ) ) {
5543 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5544 mb_type |= MB_TYPE_8x8DCT;
5545 for( i = 0; i < 16; i+=4 ) {
5546 int pred = pred_intra_mode( h, i );
5547 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5548 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5551 for( i = 0; i < 16; i++ ) {
5552 int pred = pred_intra_mode( h, i );
5553 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5555 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5558 write_back_intra_pred_mode(h);
5559 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5561 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5562 if( h->intra16x16_pred_mode < 0 ) return -1;
5565 h->chroma_pred_mode_table[mb_xy] =
5566 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5568 pred_mode= check_intra_pred_mode( h, pred_mode );
5569 if( pred_mode < 0 ) return -1;
5570 h->chroma_pred_mode= pred_mode;
5572 } else if( partition_count == 4 ) {
5573 int i, j, sub_partition_count[4], list, ref[2][4];
5575 if( h->slice_type_nos == FF_B_TYPE ) {
5576 for( i = 0; i < 4; i++ ) {
5577 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5578 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5579 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5581 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5582 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5583 pred_direct_motion(h, &mb_type);
5584 h->ref_cache[0][scan8[4]] =
5585 h->ref_cache[1][scan8[4]] =
5586 h->ref_cache[0][scan8[12]] =
5587 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5588 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5589 for( i = 0; i < 4; i++ )
5590 if( IS_DIRECT(h->sub_mb_type[i]) )
5591 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5595 for( i = 0; i < 4; i++ ) {
5596 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5597 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5598 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5602 for( list = 0; list < h->list_count; list++ ) {
5603 for( i = 0; i < 4; i++ ) {
5604 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5605 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5606 if( h->ref_count[list] > 1 ){
5607 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5608 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5609 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5617 h->ref_cache[list][ scan8[4*i]+1 ]=
5618 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5623 dct8x8_allowed = get_dct8x8_allowed(h);
5625 for(list=0; list<h->list_count; list++){
5627 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5628 if(IS_DIRECT(h->sub_mb_type[i])){
5629 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5633 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5634 const int sub_mb_type= h->sub_mb_type[i];
5635 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5636 for(j=0; j<sub_partition_count[i]; j++){
5639 const int index= 4*i + block_width*j;
5640 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5641 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5642 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5644 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5645 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5646 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5648 if(IS_SUB_8X8(sub_mb_type)){
5650 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5652 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5655 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5657 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5658 }else if(IS_SUB_8X4(sub_mb_type)){
5659 mv_cache[ 1 ][0]= mx;
5660 mv_cache[ 1 ][1]= my;
5662 mvd_cache[ 1 ][0]= mx - mpx;
5663 mvd_cache[ 1 ][1]= my - mpy;
5664 }else if(IS_SUB_4X8(sub_mb_type)){
5665 mv_cache[ 8 ][0]= mx;
5666 mv_cache[ 8 ][1]= my;
5668 mvd_cache[ 8 ][0]= mx - mpx;
5669 mvd_cache[ 8 ][1]= my - mpy;
5671 mv_cache[ 0 ][0]= mx;
5672 mv_cache[ 0 ][1]= my;
5674 mvd_cache[ 0 ][0]= mx - mpx;
5675 mvd_cache[ 0 ][1]= my - mpy;
5678 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5679 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5680 p[0] = p[1] = p[8] = p[9] = 0;
5681 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5685 } else if( IS_DIRECT(mb_type) ) {
5686 pred_direct_motion(h, &mb_type);
5687 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5688 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5689 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5691 int list, mx, my, i, mpx, mpy;
5692 if(IS_16X16(mb_type)){
5693 for(list=0; list<h->list_count; list++){
5694 if(IS_DIR(mb_type, 0, list)){
5696 if(h->ref_count[list] > 1){
5697 ref= decode_cabac_mb_ref(h, list, 0);
5698 if(ref >= (unsigned)h->ref_count[list]){
5699 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5704 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5706 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5708 for(list=0; list<h->list_count; list++){
5709 if(IS_DIR(mb_type, 0, list)){
5710 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5712 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5713 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5714 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5716 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5717 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5719 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5722 else if(IS_16X8(mb_type)){
5723 for(list=0; list<h->list_count; list++){
5725 if(IS_DIR(mb_type, i, list)){
5727 if(h->ref_count[list] > 1){
5728 ref= decode_cabac_mb_ref( h, list, 8*i );
5729 if(ref >= (unsigned)h->ref_count[list]){
5730 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5735 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5737 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5740 for(list=0; list<h->list_count; list++){
5742 if(IS_DIR(mb_type, i, list)){
5743 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5744 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5745 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5746 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5748 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5749 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5751 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5752 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5757 assert(IS_8X16(mb_type));
5758 for(list=0; list<h->list_count; list++){
5760 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5762 if(h->ref_count[list] > 1){
5763 ref= decode_cabac_mb_ref( h, list, 4*i );
5764 if(ref >= (unsigned)h->ref_count[list]){
5765 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5770 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5772 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5775 for(list=0; list<h->list_count; list++){
5777 if(IS_DIR(mb_type, i, list)){
5778 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5779 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5780 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5782 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5783 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5784 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5786 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5787 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5794 if( IS_INTER( mb_type ) ) {
5795 h->chroma_pred_mode_table[mb_xy] = 0;
5796 write_back_motion( h, mb_type );
5799 if( !IS_INTRA16x16( mb_type ) ) {
5800 cbp = decode_cabac_mb_cbp_luma( h );
5802 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5805 h->cbp_table[mb_xy] = h->cbp = cbp;
5807 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5808 if( decode_cabac_mb_transform_size( h ) )
5809 mb_type |= MB_TYPE_8x8DCT;
5811 s->current_picture.mb_type[mb_xy]= mb_type;
5813 if( cbp || IS_INTRA16x16( mb_type ) ) {
5814 const uint8_t *scan, *scan8x8, *dc_scan;
5815 const uint32_t *qmul;
5818 if(IS_INTERLACED(mb_type)){
5819 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5820 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5821 dc_scan= luma_dc_field_scan;
5823 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5824 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5825 dc_scan= luma_dc_zigzag_scan;
5828 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5829 if( dqp == INT_MIN ){
5830 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5834 if(((unsigned)s->qscale) > 51){
5835 if(s->qscale<0) s->qscale+= 52;
5836 else s->qscale-= 52;
5838 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5839 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5841 if( IS_INTRA16x16( mb_type ) ) {
5843 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5844 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5847 qmul = h->dequant4_coeff[0][s->qscale];
5848 for( i = 0; i < 16; i++ ) {
5849 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5850 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5853 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5857 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5858 if( cbp & (1<<i8x8) ) {
5859 if( IS_8x8DCT(mb_type) ) {
5860 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5861 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5863 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5864 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5865 const int index = 4*i8x8 + i4x4;
5866 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5868 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5869 //STOP_TIMER("decode_residual")
5873 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5874 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5881 for( c = 0; c < 2; c++ ) {
5882 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5883 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5889 for( c = 0; c < 2; c++ ) {
5890 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5891 for( i = 0; i < 4; i++ ) {
5892 const int index = 16 + 4 * c + i;
5893 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5894 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5898 uint8_t * const nnz= &h->non_zero_count_cache[0];
5899 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5900 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5903 uint8_t * const nnz= &h->non_zero_count_cache[0];
5904 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5905 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5906 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5907 h->last_qscale_diff = 0;
5910 s->current_picture.qscale_table[mb_xy]= s->qscale;
5911 write_back_non_zero_count(h);
5914 h->ref_count[0] >>= 1;
5915 h->ref_count[1] >>= 1;
5922 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5923 const int index_a = qp + h->slice_alpha_c0_offset;
5924 const int alpha = (alpha_table+52)[index_a];
5925 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5929 tc[0] = (tc0_table+52)[index_a][bS[0]];
5930 tc[1] = (tc0_table+52)[index_a][bS[1]];
5931 tc[2] = (tc0_table+52)[index_a][bS[2]];
5932 tc[3] = (tc0_table+52)[index_a][bS[3]];
5933 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5935 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5938 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5939 const int index_a = qp + h->slice_alpha_c0_offset;
5940 const int alpha = (alpha_table+52)[index_a];
5941 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5945 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5946 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5947 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5948 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5949 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5951 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5955 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5957 for( i = 0; i < 16; i++, pix += stride) {
5963 int bS_index = (i >> 1);
5966 bS_index |= (i & 1);
5969 if( bS[bS_index] == 0 ) {
5973 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5974 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5975 alpha = (alpha_table+52)[index_a];
5976 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5978 if( bS[bS_index] < 4 ) {
5979 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5980 const int p0 = pix[-1];
5981 const int p1 = pix[-2];
5982 const int p2 = pix[-3];
5983 const int q0 = pix[0];
5984 const int q1 = pix[1];
5985 const int q2 = pix[2];
5987 if( FFABS( p0 - q0 ) < alpha &&
5988 FFABS( p1 - p0 ) < beta &&
5989 FFABS( q1 - q0 ) < beta ) {
5993 if( FFABS( p2 - p0 ) < beta ) {
5994 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5997 if( FFABS( q2 - q0 ) < beta ) {
5998 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6002 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6003 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6004 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6005 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6008 const int p0 = pix[-1];
6009 const int p1 = pix[-2];
6010 const int p2 = pix[-3];
6012 const int q0 = pix[0];
6013 const int q1 = pix[1];
6014 const int q2 = pix[2];
6016 if( FFABS( p0 - q0 ) < alpha &&
6017 FFABS( p1 - p0 ) < beta &&
6018 FFABS( q1 - q0 ) < beta ) {
6020 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6021 if( FFABS( p2 - p0 ) < beta)
6023 const int p3 = pix[-4];
6025 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6026 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6027 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6030 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6032 if( FFABS( q2 - q0 ) < beta)
6034 const int q3 = pix[3];
6036 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6037 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6038 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6041 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6045 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6046 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6048 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6053 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6055 for( i = 0; i < 8; i++, pix += stride) {
6063 if( bS[bS_index] == 0 ) {
6067 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6068 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6069 alpha = (alpha_table+52)[index_a];
6070 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6072 if( bS[bS_index] < 4 ) {
6073 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6074 const int p0 = pix[-1];
6075 const int p1 = pix[-2];
6076 const int q0 = pix[0];
6077 const int q1 = pix[1];
6079 if( FFABS( p0 - q0 ) < alpha &&
6080 FFABS( p1 - p0 ) < beta &&
6081 FFABS( q1 - q0 ) < beta ) {
6082 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6084 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6085 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6086 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6089 const int p0 = pix[-1];
6090 const int p1 = pix[-2];
6091 const int q0 = pix[0];
6092 const int q1 = pix[1];
6094 if( FFABS( p0 - q0 ) < alpha &&
6095 FFABS( p1 - p0 ) < beta &&
6096 FFABS( q1 - q0 ) < beta ) {
6098 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6099 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6100 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6106 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6107 const int index_a = qp + h->slice_alpha_c0_offset;
6108 const int alpha = (alpha_table+52)[index_a];
6109 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6113 tc[0] = (tc0_table+52)[index_a][bS[0]];
6114 tc[1] = (tc0_table+52)[index_a][bS[1]];
6115 tc[2] = (tc0_table+52)[index_a][bS[2]];
6116 tc[3] = (tc0_table+52)[index_a][bS[3]];
6117 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6119 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6123 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6124 const int index_a = qp + h->slice_alpha_c0_offset;
6125 const int alpha = (alpha_table+52)[index_a];
6126 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6130 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6131 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6132 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6133 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6134 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6136 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6140 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6141 MpegEncContext * const s = &h->s;
6142 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6144 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6148 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6149 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6150 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6151 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6152 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6155 assert(!FRAME_MBAFF);
6157 mb_type = s->current_picture.mb_type[mb_xy];
6158 qp = s->current_picture.qscale_table[mb_xy];
6159 qp0 = s->current_picture.qscale_table[mb_xy-1];
6160 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6161 qpc = get_chroma_qp( h, 0, qp );
6162 qpc0 = get_chroma_qp( h, 0, qp0 );
6163 qpc1 = get_chroma_qp( h, 0, qp1 );
6164 qp0 = (qp + qp0 + 1) >> 1;
6165 qp1 = (qp + qp1 + 1) >> 1;
6166 qpc0 = (qpc + qpc0 + 1) >> 1;
6167 qpc1 = (qpc + qpc1 + 1) >> 1;
6168 qp_thresh = 15 - h->slice_alpha_c0_offset;
6169 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6170 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6173 if( IS_INTRA(mb_type) ) {
6174 int16_t bS4[4] = {4,4,4,4};
6175 int16_t bS3[4] = {3,3,3,3};
6176 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6177 if( IS_8x8DCT(mb_type) ) {
6178 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6179 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6180 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6181 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6183 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6184 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6185 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6186 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6187 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6188 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6189 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6190 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6192 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6193 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6194 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6195 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6196 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6197 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6198 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6199 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6202 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6203 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6205 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6207 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6209 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6210 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6211 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6212 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6214 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6215 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6216 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6217 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6219 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6220 bSv[0][0] = 0x0004000400040004ULL;
6221 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6222 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6224 #define FILTER(hv,dir,edge)\
6225 if(bSv[dir][edge]) {\
6226 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6228 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6229 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6235 } else if( IS_8x8DCT(mb_type) ) {
6255 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6256 MpegEncContext * const s = &h->s;
6258 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6259 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6260 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6261 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6262 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6264 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6265 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6266 // how often to recheck mv-based bS when iterating between edges
6267 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6268 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6269 // how often to recheck mv-based bS when iterating along each edge
6270 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6272 if (first_vertical_edge_done) {
6276 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6279 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6280 && !IS_INTERLACED(mb_type)
6281 && IS_INTERLACED(mbm_type)
6283 // This is a special case in the norm where the filtering must
6284 // be done twice (one each of the field) even if we are in a
6285 // frame macroblock.
6287 static const int nnz_idx[4] = {4,5,6,3};
6288 unsigned int tmp_linesize = 2 * linesize;
6289 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6290 int mbn_xy = mb_xy - 2 * s->mb_stride;
6295 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6296 if( IS_INTRA(mb_type) ||
6297 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6298 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6300 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6301 for( i = 0; i < 4; i++ ) {
6302 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6303 mbn_nnz[nnz_idx[i]] != 0 )
6309 // Do not use s->qscale as luma quantizer because it has not the same
6310 // value in IPCM macroblocks.
6311 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6312 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6313 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6314 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6315 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6316 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6317 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6318 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6325 for( edge = start; edge < edges; edge++ ) {
6326 /* mbn_xy: neighbor macroblock */
6327 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6328 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6329 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6333 if( (edge&1) && IS_8x8DCT(mb_type) )
6336 if( IS_INTRA(mb_type) ||
6337 IS_INTRA(mbn_type) ) {
6340 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6341 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6350 bS[0] = bS[1] = bS[2] = bS[3] = value;
6355 if( edge & mask_edge ) {
6356 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6359 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6360 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6363 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6364 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6365 int bn_idx= b_idx - (dir ? 8:1);
6368 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6369 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6370 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6371 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6374 if(h->slice_type_nos == FF_B_TYPE && v){
6376 for( l = 0; !v && l < 2; l++ ) {
6378 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6379 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6380 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6384 bS[0] = bS[1] = bS[2] = bS[3] = v;
6390 for( i = 0; i < 4; i++ ) {
6391 int x = dir == 0 ? edge : i;
6392 int y = dir == 0 ? i : edge;
6393 int b_idx= 8 + 4 + x + 8*y;
6394 int bn_idx= b_idx - (dir ? 8:1);
6396 if( h->non_zero_count_cache[b_idx] |
6397 h->non_zero_count_cache[bn_idx] ) {
6403 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6404 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6405 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6406 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6412 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6414 for( l = 0; l < 2; l++ ) {
6416 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6417 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6418 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6427 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6432 // Do not use s->qscale as luma quantizer because it has not the same
6433 // value in IPCM macroblocks.
6434 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6435 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6436 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6437 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6439 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6440 if( (edge&1) == 0 ) {
6441 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6442 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6443 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6444 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6447 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6448 if( (edge&1) == 0 ) {
6449 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6450 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6451 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6452 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6458 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6459 MpegEncContext * const s = &h->s;
6460 const int mb_xy= mb_x + mb_y*s->mb_stride;
6461 const int mb_type = s->current_picture.mb_type[mb_xy];
6462 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6463 int first_vertical_edge_done = 0;
6466 //for sufficiently low qp, filtering wouldn't do anything
6467 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6469 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6470 int qp = s->current_picture.qscale_table[mb_xy];
6472 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6473 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6478 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6479 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6480 int top_type, left_type[2];
6481 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6482 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6483 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6485 if(IS_8x8DCT(top_type)){
6486 h->non_zero_count_cache[4+8*0]=
6487 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6488 h->non_zero_count_cache[6+8*0]=
6489 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6491 if(IS_8x8DCT(left_type[0])){
6492 h->non_zero_count_cache[3+8*1]=
6493 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6495 if(IS_8x8DCT(left_type[1])){
6496 h->non_zero_count_cache[3+8*3]=
6497 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6500 if(IS_8x8DCT(mb_type)){
6501 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6502 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6504 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6505 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6507 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6508 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6510 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6511 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6516 // left mb is in picture
6517 && h->slice_table[mb_xy-1] != 0xFFFF
6518 // and current and left pair do not have the same interlaced type
6519 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6520 // and left mb is in the same slice if deblocking_filter == 2
6521 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6522 /* First vertical edge is different in MBAFF frames
6523 * There are 8 different bS to compute and 2 different Qp
6525 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6526 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6531 int mb_qp, mbn0_qp, mbn1_qp;
6533 first_vertical_edge_done = 1;
6535 if( IS_INTRA(mb_type) )
6536 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6538 for( i = 0; i < 8; i++ ) {
6539 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6541 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6543 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6544 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6545 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6547 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6554 mb_qp = s->current_picture.qscale_table[mb_xy];
6555 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6556 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6557 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6558 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6559 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6560 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6561 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6562 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6563 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6564 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6565 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6566 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6569 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6570 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6571 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6572 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6573 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6577 for( dir = 0; dir < 2; dir++ )
6578 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6580 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6581 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6585 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6586 H264Context *h = *(void**)arg;
6587 MpegEncContext * const s = &h->s;
6588 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6592 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6593 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (CONFIG_H264_ENCODER && s->encoding);
6595 if( h->pps.cabac ) {
6599 align_get_bits( &s->gb );
6602 ff_init_cabac_states( &h->cabac);
6603 ff_init_cabac_decoder( &h->cabac,
6604 s->gb.buffer + get_bits_count(&s->gb)/8,
6605 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6606 /* calculate pre-state */
6607 for( i= 0; i < 460; i++ ) {
6609 if( h->slice_type_nos == FF_I_TYPE )
6610 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6612 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6615 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6617 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6622 int ret = decode_mb_cabac(h);
6624 //STOP_TIMER("decode_mb_cabac")
6626 if(ret>=0) hl_decode_mb(h);
6628 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6631 ret = decode_mb_cabac(h);
6633 if(ret>=0) hl_decode_mb(h);
6636 eos = get_cabac_terminate( &h->cabac );
6638 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6639 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6640 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6644 if( ++s->mb_x >= s->mb_width ) {
6646 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6648 if(FIELD_OR_MBAFF_PICTURE) {
6653 if( eos || s->mb_y >= s->mb_height ) {
6654 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6655 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6662 int ret = decode_mb_cavlc(h);
6664 if(ret>=0) hl_decode_mb(h);
6666 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6668 ret = decode_mb_cavlc(h);
6670 if(ret>=0) hl_decode_mb(h);
6675 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6676 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6681 if(++s->mb_x >= s->mb_width){
6683 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6685 if(FIELD_OR_MBAFF_PICTURE) {
6688 if(s->mb_y >= s->mb_height){
6689 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6691 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6692 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6696 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6703 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6704 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6705 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6706 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6710 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6719 for(;s->mb_y < s->mb_height; s->mb_y++){
6720 for(;s->mb_x < s->mb_width; s->mb_x++){
6721 int ret= decode_mb(h);
6726 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6727 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6732 if(++s->mb_x >= s->mb_width){
6734 if(++s->mb_y >= s->mb_height){
6735 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6736 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6740 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6747 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6748 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6749 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6753 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6760 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6763 return -1; //not reached
6766 static int decode_picture_timing(H264Context *h){
6767 MpegEncContext * const s = &h->s;
6768 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6769 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6770 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6772 if(h->sps.pic_struct_present_flag){
6773 unsigned int i, num_clock_ts;
6774 h->sei_pic_struct = get_bits(&s->gb, 4);
6776 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6779 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6781 for (i = 0 ; i < num_clock_ts ; i++){
6782 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6783 unsigned int full_timestamp_flag;
6784 skip_bits(&s->gb, 2); /* ct_type */
6785 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6786 skip_bits(&s->gb, 5); /* counting_type */
6787 full_timestamp_flag = get_bits(&s->gb, 1);
6788 skip_bits(&s->gb, 1); /* discontinuity_flag */
6789 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6790 skip_bits(&s->gb, 8); /* n_frames */
6791 if(full_timestamp_flag){
6792 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6793 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6794 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6796 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6797 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6798 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6799 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6800 if(get_bits(&s->gb, 1)) /* hours_flag */
6801 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6805 if(h->sps.time_offset_length > 0)
6806 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6813 static int decode_unregistered_user_data(H264Context *h, int size){
6814 MpegEncContext * const s = &h->s;
6815 uint8_t user_data[16+256];
6821 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6822 user_data[i]= get_bits(&s->gb, 8);
6826 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6827 if(e==1 && build>=0)
6828 h->x264_build= build;
6830 if(s->avctx->debug & FF_DEBUG_BUGS)
6831 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6834 skip_bits(&s->gb, 8);
6839 static int decode_sei(H264Context *h){
6840 MpegEncContext * const s = &h->s;
6842 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6847 type+= show_bits(&s->gb, 8);
6848 }while(get_bits(&s->gb, 8) == 255);
6852 size+= show_bits(&s->gb, 8);
6853 }while(get_bits(&s->gb, 8) == 255);
6856 case 1: // Picture timing SEI
6857 if(decode_picture_timing(h) < 0)
6861 if(decode_unregistered_user_data(h, size) < 0)
6865 skip_bits(&s->gb, 8*size);
6868 //FIXME check bits here
6869 align_get_bits(&s->gb);
6875 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6876 MpegEncContext * const s = &h->s;
6878 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6880 if(cpb_count > 32U){
6881 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6885 get_bits(&s->gb, 4); /* bit_rate_scale */
6886 get_bits(&s->gb, 4); /* cpb_size_scale */
6887 for(i=0; i<cpb_count; i++){
6888 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6889 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6890 get_bits1(&s->gb); /* cbr_flag */
6892 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6893 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6894 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6895 sps->time_offset_length = get_bits(&s->gb, 5);
6899 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6900 MpegEncContext * const s = &h->s;
6901 int aspect_ratio_info_present_flag;
6902 unsigned int aspect_ratio_idc;
6904 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6906 if( aspect_ratio_info_present_flag ) {
6907 aspect_ratio_idc= get_bits(&s->gb, 8);
6908 if( aspect_ratio_idc == EXTENDED_SAR ) {
6909 sps->sar.num= get_bits(&s->gb, 16);
6910 sps->sar.den= get_bits(&s->gb, 16);
6911 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6912 sps->sar= pixel_aspect[aspect_ratio_idc];
6914 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6921 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6923 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6924 get_bits1(&s->gb); /* overscan_appropriate_flag */
6927 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6928 get_bits(&s->gb, 3); /* video_format */
6929 get_bits1(&s->gb); /* video_full_range_flag */
6930 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6931 get_bits(&s->gb, 8); /* colour_primaries */
6932 get_bits(&s->gb, 8); /* transfer_characteristics */
6933 get_bits(&s->gb, 8); /* matrix_coefficients */
6937 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6938 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6939 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6942 sps->timing_info_present_flag = get_bits1(&s->gb);
6943 if(sps->timing_info_present_flag){
6944 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6945 sps->time_scale = get_bits_long(&s->gb, 32);
6946 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6949 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6950 if(sps->nal_hrd_parameters_present_flag)
6951 if(decode_hrd_parameters(h, sps) < 0)
6953 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6954 if(sps->vcl_hrd_parameters_present_flag)
6955 if(decode_hrd_parameters(h, sps) < 0)
6957 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6958 get_bits1(&s->gb); /* low_delay_hrd_flag */
6959 sps->pic_struct_present_flag = get_bits1(&s->gb);
6961 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6962 if(sps->bitstream_restriction_flag){
6963 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6964 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6965 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6966 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6967 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6968 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6969 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6971 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6972 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6980 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6981 const uint8_t *jvt_list, const uint8_t *fallback_list){
6982 MpegEncContext * const s = &h->s;
6983 int i, last = 8, next = 8;
6984 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
6985 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6986 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6988 for(i=0;i<size;i++){
6990 next = (last + get_se_golomb(&s->gb)) & 0xff;
6991 if(!i && !next){ /* matrix not written, we use the preset one */
6992 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6995 last = factors[scan[i]] = next ? next : last;
6999 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7000 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7001 MpegEncContext * const s = &h->s;
7002 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7003 const uint8_t *fallback[4] = {
7004 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7005 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7006 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7007 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7009 if(get_bits1(&s->gb)){
7010 sps->scaling_matrix_present |= is_sps;
7011 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7012 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7013 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7014 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7015 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7016 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7017 if(is_sps || pps->transform_8x8_mode){
7018 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7019 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7024 static inline int decode_seq_parameter_set(H264Context *h){
7025 MpegEncContext * const s = &h->s;
7026 int profile_idc, level_idc;
7027 unsigned int sps_id;
7031 profile_idc= get_bits(&s->gb, 8);
7032 get_bits1(&s->gb); //constraint_set0_flag
7033 get_bits1(&s->gb); //constraint_set1_flag
7034 get_bits1(&s->gb); //constraint_set2_flag
7035 get_bits1(&s->gb); //constraint_set3_flag
7036 get_bits(&s->gb, 4); // reserved
7037 level_idc= get_bits(&s->gb, 8);
7038 sps_id= get_ue_golomb_31(&s->gb);
7040 if(sps_id >= MAX_SPS_COUNT) {
7041 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7044 sps= av_mallocz(sizeof(SPS));
7048 sps->profile_idc= profile_idc;
7049 sps->level_idc= level_idc;
7051 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7052 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7053 sps->scaling_matrix_present = 0;
7055 if(sps->profile_idc >= 100){ //high profile
7056 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7057 if(sps->chroma_format_idc == 3)
7058 get_bits1(&s->gb); //residual_color_transform_flag
7059 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7060 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7061 sps->transform_bypass = get_bits1(&s->gb);
7062 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7064 sps->chroma_format_idc= 1;
7067 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7068 sps->poc_type= get_ue_golomb_31(&s->gb);
7070 if(sps->poc_type == 0){ //FIXME #define
7071 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7072 } else if(sps->poc_type == 1){//FIXME #define
7073 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7074 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7075 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7076 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7078 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7079 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7083 for(i=0; i<sps->poc_cycle_length; i++)
7084 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7085 }else if(sps->poc_type != 2){
7086 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7090 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7091 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7092 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7095 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7096 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7097 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7098 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7099 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7100 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7104 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7105 if(!sps->frame_mbs_only_flag)
7106 sps->mb_aff= get_bits1(&s->gb);
7110 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7112 #ifndef ALLOW_INTERLACE
7114 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7116 sps->crop= get_bits1(&s->gb);
7118 sps->crop_left = get_ue_golomb(&s->gb);
7119 sps->crop_right = get_ue_golomb(&s->gb);
7120 sps->crop_top = get_ue_golomb(&s->gb);
7121 sps->crop_bottom= get_ue_golomb(&s->gb);
7122 if(sps->crop_left || sps->crop_top){
7123 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7125 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7126 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7132 sps->crop_bottom= 0;
7135 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7136 if( sps->vui_parameters_present_flag )
7137 decode_vui_parameters(h, sps);
7139 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7140 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7141 sps_id, sps->profile_idc, sps->level_idc,
7143 sps->ref_frame_count,
7144 sps->mb_width, sps->mb_height,
7145 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7146 sps->direct_8x8_inference_flag ? "8B8" : "",
7147 sps->crop_left, sps->crop_right,
7148 sps->crop_top, sps->crop_bottom,
7149 sps->vui_parameters_present_flag ? "VUI" : "",
7150 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7153 av_free(h->sps_buffers[sps_id]);
7154 h->sps_buffers[sps_id]= sps;
7162 build_qp_table(PPS *pps, int t, int index)
7165 for(i = 0; i < 52; i++)
7166 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7169 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7170 MpegEncContext * const s = &h->s;
7171 unsigned int pps_id= get_ue_golomb(&s->gb);
7174 if(pps_id >= MAX_PPS_COUNT) {
7175 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7179 pps= av_mallocz(sizeof(PPS));
7182 pps->sps_id= get_ue_golomb_31(&s->gb);
7183 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7184 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7188 pps->cabac= get_bits1(&s->gb);
7189 pps->pic_order_present= get_bits1(&s->gb);
7190 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7191 if(pps->slice_group_count > 1 ){
7192 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7193 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7194 switch(pps->mb_slice_group_map_type){
7197 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7198 | run_length[ i ] |1 |ue(v) |
7203 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7205 | top_left_mb[ i ] |1 |ue(v) |
7206 | bottom_right_mb[ i ] |1 |ue(v) |
7214 | slice_group_change_direction_flag |1 |u(1) |
7215 | slice_group_change_rate_minus1 |1 |ue(v) |
7220 | slice_group_id_cnt_minus1 |1 |ue(v) |
7221 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7223 | slice_group_id[ i ] |1 |u(v) |
7228 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7229 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7230 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7231 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7235 pps->weighted_pred= get_bits1(&s->gb);
7236 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7237 pps->init_qp= get_se_golomb(&s->gb) + 26;
7238 pps->init_qs= get_se_golomb(&s->gb) + 26;
7239 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7240 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7241 pps->constrained_intra_pred= get_bits1(&s->gb);
7242 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7244 pps->transform_8x8_mode= 0;
7245 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7246 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7247 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7249 if(get_bits_count(&s->gb) < bit_length){
7250 pps->transform_8x8_mode= get_bits1(&s->gb);
7251 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7252 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7254 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7257 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7258 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7259 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7260 h->pps.chroma_qp_diff= 1;
7262 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7263 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7264 pps_id, pps->sps_id,
7265 pps->cabac ? "CABAC" : "CAVLC",
7266 pps->slice_group_count,
7267 pps->ref_count[0], pps->ref_count[1],
7268 pps->weighted_pred ? "weighted" : "",
7269 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7270 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7271 pps->constrained_intra_pred ? "CONSTR" : "",
7272 pps->redundant_pic_cnt_present ? "REDU" : "",
7273 pps->transform_8x8_mode ? "8x8DCT" : ""
7277 av_free(h->pps_buffers[pps_id]);
7278 h->pps_buffers[pps_id]= pps;
7286 * Call decode_slice() for each context.
7288 * @param h h264 master context
7289 * @param context_count number of contexts to execute
7291 static void execute_decode_slices(H264Context *h, int context_count){
7292 MpegEncContext * const s = &h->s;
7293 AVCodecContext * const avctx= s->avctx;
7297 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7299 if(context_count == 1) {
7300 decode_slice(avctx, &h);
7302 for(i = 1; i < context_count; i++) {
7303 hx = h->thread_context[i];
7304 hx->s.error_recognition = avctx->error_recognition;
7305 hx->s.error_count = 0;
7308 avctx->execute(avctx, (void *)decode_slice,
7309 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7311 /* pull back stuff from slices to master context */
7312 hx = h->thread_context[context_count - 1];
7313 s->mb_x = hx->s.mb_x;
7314 s->mb_y = hx->s.mb_y;
7315 s->dropable = hx->s.dropable;
7316 s->picture_structure = hx->s.picture_structure;
7317 for(i = 1; i < context_count; i++)
7318 h->s.error_count += h->thread_context[i]->s.error_count;
7323 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7324 MpegEncContext * const s = &h->s;
7325 AVCodecContext * const avctx= s->avctx;
7327 H264Context *hx; ///< thread context
7328 int context_count = 0;
7330 h->max_contexts = avctx->thread_count;
7333 for(i=0; i<50; i++){
7334 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7337 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7338 h->current_slice = 0;
7339 if (!s->first_field)
7340 s->current_picture_ptr= NULL;
7352 if(buf_index >= buf_size) break;
7354 for(i = 0; i < h->nal_length_size; i++)
7355 nalsize = (nalsize << 8) | buf[buf_index++];
7356 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7361 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7366 // start code prefix search
7367 for(; buf_index + 3 < buf_size; buf_index++){
7368 // This should always succeed in the first iteration.
7369 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7373 if(buf_index+3 >= buf_size) break;
7378 hx = h->thread_context[context_count];
7380 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7381 if (ptr==NULL || dst_length < 0){
7384 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7386 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7388 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7389 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7392 if (h->is_avc && (nalsize != consumed)){
7393 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7397 buf_index += consumed;
7399 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7400 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7405 switch(hx->nal_unit_type){
7407 if (h->nal_unit_type != NAL_IDR_SLICE) {
7408 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7411 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7413 init_get_bits(&hx->s.gb, ptr, bit_length);
7415 hx->inter_gb_ptr= &hx->s.gb;
7416 hx->s.data_partitioning = 0;
7418 if((err = decode_slice_header(hx, h)))
7421 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7422 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7423 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7424 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7425 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7426 && avctx->skip_frame < AVDISCARD_ALL){
7427 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7428 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7429 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7430 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7436 init_get_bits(&hx->s.gb, ptr, bit_length);
7438 hx->inter_gb_ptr= NULL;
7439 hx->s.data_partitioning = 1;
7441 err = decode_slice_header(hx, h);
7444 init_get_bits(&hx->intra_gb, ptr, bit_length);
7445 hx->intra_gb_ptr= &hx->intra_gb;
7448 init_get_bits(&hx->inter_gb, ptr, bit_length);
7449 hx->inter_gb_ptr= &hx->inter_gb;
7451 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7452 && s->context_initialized
7454 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7455 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7456 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7457 && avctx->skip_frame < AVDISCARD_ALL)
7461 init_get_bits(&s->gb, ptr, bit_length);
7465 init_get_bits(&s->gb, ptr, bit_length);
7466 decode_seq_parameter_set(h);
7468 if(s->flags& CODEC_FLAG_LOW_DELAY)
7471 if(avctx->has_b_frames < 2)
7472 avctx->has_b_frames= !s->low_delay;
7475 init_get_bits(&s->gb, ptr, bit_length);
7477 decode_picture_parameter_set(h, bit_length);
7481 case NAL_END_SEQUENCE:
7482 case NAL_END_STREAM:
7483 case NAL_FILLER_DATA:
7485 case NAL_AUXILIARY_SLICE:
7488 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7491 if(context_count == h->max_contexts) {
7492 execute_decode_slices(h, context_count);
7497 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7499 /* Slice could not be decoded in parallel mode, copy down
7500 * NAL unit stuff to context 0 and restart. Note that
7501 * rbsp_buffer is not transferred, but since we no longer
7502 * run in parallel mode this should not be an issue. */
7503 h->nal_unit_type = hx->nal_unit_type;
7504 h->nal_ref_idc = hx->nal_ref_idc;
7510 execute_decode_slices(h, context_count);
7515 * returns the number of bytes consumed for building the current frame
7517 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7518 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7519 if(pos+10>buf_size) pos=buf_size; // oops ;)
7524 static int decode_frame(AVCodecContext *avctx,
7525 void *data, int *data_size,
7526 const uint8_t *buf, int buf_size)
7528 H264Context *h = avctx->priv_data;
7529 MpegEncContext *s = &h->s;
7530 AVFrame *pict = data;
7533 s->flags= avctx->flags;
7534 s->flags2= avctx->flags2;
7536 /* end of stream, output what is still in the buffers */
7537 if (buf_size == 0) {
7541 //FIXME factorize this with the output code below
7542 out = h->delayed_pic[0];
7544 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7545 if(h->delayed_pic[i]->poc < out->poc){
7546 out = h->delayed_pic[i];
7550 for(i=out_idx; h->delayed_pic[i]; i++)
7551 h->delayed_pic[i] = h->delayed_pic[i+1];
7554 *data_size = sizeof(AVFrame);
7555 *pict= *(AVFrame*)out;
7561 if(h->is_avc && !h->got_avcC) {
7562 int i, cnt, nalsize;
7563 unsigned char *p = avctx->extradata;
7564 if(avctx->extradata_size < 7) {
7565 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7569 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7572 /* sps and pps in the avcC always have length coded with 2 bytes,
7573 so put a fake nal_length_size = 2 while parsing them */
7574 h->nal_length_size = 2;
7575 // Decode sps from avcC
7576 cnt = *(p+5) & 0x1f; // Number of sps
7578 for (i = 0; i < cnt; i++) {
7579 nalsize = AV_RB16(p) + 2;
7580 if(decode_nal_units(h, p, nalsize) < 0) {
7581 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7586 // Decode pps from avcC
7587 cnt = *(p++); // Number of pps
7588 for (i = 0; i < cnt; i++) {
7589 nalsize = AV_RB16(p) + 2;
7590 if(decode_nal_units(h, p, nalsize) != nalsize) {
7591 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7596 // Now store right nal length size, that will be use to parse all other nals
7597 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7598 // Do not reparse avcC
7602 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7603 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7608 buf_index=decode_nal_units(h, buf, buf_size);
7612 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7613 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7614 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7618 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7619 Picture *out = s->current_picture_ptr;
7620 Picture *cur = s->current_picture_ptr;
7621 int i, pics, cross_idr, out_of_order, out_idx;
7625 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7626 s->current_picture_ptr->pict_type= s->pict_type;
7628 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7629 ff_vdpau_h264_set_reference_frames(s);
7632 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7633 h->prev_poc_msb= h->poc_msb;
7634 h->prev_poc_lsb= h->poc_lsb;
7636 h->prev_frame_num_offset= h->frame_num_offset;
7637 h->prev_frame_num= h->frame_num;
7639 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7640 ff_vdpau_h264_picture_complete(s);
7643 * FIXME: Error handling code does not seem to support interlaced
7644 * when slices span multiple rows
7645 * The ff_er_add_slice calls don't work right for bottom
7646 * fields; they cause massive erroneous error concealing
7647 * Error marking covers both fields (top and bottom).
7648 * This causes a mismatched s->error_count
7649 * and a bad error table. Further, the error count goes to
7650 * INT_MAX when called for bottom field, because mb_y is
7651 * past end by one (callers fault) and resync_mb_y != 0
7652 * causes problems for the first MB line, too.
7659 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7660 /* Wait for second field. */
7664 cur->repeat_pict = 0;
7666 /* Signal interlacing information externally. */
7667 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7668 if(h->sps.pic_struct_present_flag){
7669 switch (h->sei_pic_struct)
7671 case SEI_PIC_STRUCT_FRAME:
7672 cur->interlaced_frame = 0;
7674 case SEI_PIC_STRUCT_TOP_FIELD:
7675 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7676 case SEI_PIC_STRUCT_TOP_BOTTOM:
7677 case SEI_PIC_STRUCT_BOTTOM_TOP:
7678 cur->interlaced_frame = 1;
7680 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7681 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7682 // Signal the possibility of telecined film externally (pic_struct 5,6)
7683 // From these hints, let the applications decide if they apply deinterlacing.
7684 cur->repeat_pict = 1;
7685 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7687 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7688 // Force progressive here, as doubling interlaced frame is a bad idea.
7689 cur->interlaced_frame = 0;
7690 cur->repeat_pict = 2;
7692 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7693 cur->interlaced_frame = 0;
7694 cur->repeat_pict = 4;
7698 /* Derive interlacing flag from used decoding process. */
7699 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7702 if (cur->field_poc[0] != cur->field_poc[1]){
7703 /* Derive top_field_first from field pocs. */
7704 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7706 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7707 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7708 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7709 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7710 cur->top_field_first = 1;
7712 cur->top_field_first = 0;
7714 /* Most likely progressive */
7715 cur->top_field_first = 0;
7719 //FIXME do something with unavailable reference frames
7721 /* Sort B-frames into display order */
7723 if(h->sps.bitstream_restriction_flag
7724 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7725 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7729 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7730 && !h->sps.bitstream_restriction_flag){
7731 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7736 while(h->delayed_pic[pics]) pics++;
7738 assert(pics <= MAX_DELAYED_PIC_COUNT);
7740 h->delayed_pic[pics++] = cur;
7741 if(cur->reference == 0)
7742 cur->reference = DELAYED_PIC_REF;
7744 out = h->delayed_pic[0];
7746 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7747 if(h->delayed_pic[i]->poc < out->poc){
7748 out = h->delayed_pic[i];
7751 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7753 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7755 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7757 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7759 ((!cross_idr && out->poc > h->outputed_poc + 2)
7760 || cur->pict_type == FF_B_TYPE)))
7763 s->avctx->has_b_frames++;
7766 if(out_of_order || pics > s->avctx->has_b_frames){
7767 out->reference &= ~DELAYED_PIC_REF;
7768 for(i=out_idx; h->delayed_pic[i]; i++)
7769 h->delayed_pic[i] = h->delayed_pic[i+1];
7771 if(!out_of_order && pics > s->avctx->has_b_frames){
7772 *data_size = sizeof(AVFrame);
7774 h->outputed_poc = out->poc;
7775 *pict= *(AVFrame*)out;
7777 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7782 assert(pict->data[0] || !*data_size);
7783 ff_print_debug_info(s, pict);
7784 //printf("out %d\n", (int)pict->data[0]);
7787 /* Return the Picture timestamp as the frame number */
7788 /* we subtract 1 because it is added on utils.c */
7789 avctx->frame_number = s->picture_number - 1;
7791 return get_consumed_bytes(s, buf_index, buf_size);
7794 static inline void fill_mb_avail(H264Context *h){
7795 MpegEncContext * const s = &h->s;
7796 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7799 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7800 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7801 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7807 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7808 h->mb_avail[4]= 1; //FIXME move out
7809 h->mb_avail[5]= 0; //FIXME move out
7817 #define SIZE (COUNT*40)
7823 // int int_temp[10000];
7825 AVCodecContext avctx;
7827 dsputil_init(&dsp, &avctx);
7829 init_put_bits(&pb, temp, SIZE);
7830 printf("testing unsigned exp golomb\n");
7831 for(i=0; i<COUNT; i++){
7833 set_ue_golomb(&pb, i);
7834 STOP_TIMER("set_ue_golomb");
7836 flush_put_bits(&pb);
7838 init_get_bits(&gb, temp, 8*SIZE);
7839 for(i=0; i<COUNT; i++){
7842 s= show_bits(&gb, 24);
7845 j= get_ue_golomb(&gb);
7847 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7850 STOP_TIMER("get_ue_golomb");
7854 init_put_bits(&pb, temp, SIZE);
7855 printf("testing signed exp golomb\n");
7856 for(i=0; i<COUNT; i++){
7858 set_se_golomb(&pb, i - COUNT/2);
7859 STOP_TIMER("set_se_golomb");
7861 flush_put_bits(&pb);
7863 init_get_bits(&gb, temp, 8*SIZE);
7864 for(i=0; i<COUNT; i++){
7867 s= show_bits(&gb, 24);
7870 j= get_se_golomb(&gb);
7871 if(j != i - COUNT/2){
7872 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7875 STOP_TIMER("get_se_golomb");
7879 printf("testing 4x4 (I)DCT\n");
7882 uint8_t src[16], ref[16];
7883 uint64_t error= 0, max_error=0;
7885 for(i=0; i<COUNT; i++){
7887 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7888 for(j=0; j<16; j++){
7889 ref[j]= random()%255;
7890 src[j]= random()%255;
7893 h264_diff_dct_c(block, src, ref, 4);
7896 for(j=0; j<16; j++){
7897 // printf("%d ", block[j]);
7898 block[j]= block[j]*4;
7899 if(j&1) block[j]= (block[j]*4 + 2)/5;
7900 if(j&4) block[j]= (block[j]*4 + 2)/5;
7904 s->dsp.h264_idct_add(ref, block, 4);
7905 /* for(j=0; j<16; j++){
7906 printf("%d ", ref[j]);
7910 for(j=0; j<16; j++){
7911 int diff= FFABS(src[j] - ref[j]);
7914 max_error= FFMAX(max_error, diff);
7917 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7918 printf("testing quantizer\n");
7919 for(qp=0; qp<52; qp++){
7921 src1_block[i]= src2_block[i]= random()%255;
7924 printf("Testing NAL layer\n");
7926 uint8_t bitstream[COUNT];
7927 uint8_t nal[COUNT*2];
7929 memset(&h, 0, sizeof(H264Context));
7931 for(i=0; i<COUNT; i++){
7939 for(j=0; j<COUNT; j++){
7940 bitstream[j]= (random() % 255) + 1;
7943 for(j=0; j<zeros; j++){
7944 int pos= random() % COUNT;
7945 while(bitstream[pos] == 0){
7954 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7956 printf("encoding failed\n");
7960 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7964 if(out_length != COUNT){
7965 printf("incorrect length %d %d\n", out_length, COUNT);
7969 if(consumed != nal_length){
7970 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7974 if(memcmp(bitstream, out, COUNT)){
7975 printf("mismatch\n");
7981 printf("Testing RBSP\n");
7989 static av_cold int decode_end(AVCodecContext *avctx)
7991 H264Context *h = avctx->priv_data;
7992 MpegEncContext *s = &h->s;
7995 av_freep(&h->rbsp_buffer[0]);
7996 av_freep(&h->rbsp_buffer[1]);
7997 free_tables(h); //FIXME cleanup init stuff perhaps
7999 for(i = 0; i < MAX_SPS_COUNT; i++)
8000 av_freep(h->sps_buffers + i);
8002 for(i = 0; i < MAX_PPS_COUNT; i++)
8003 av_freep(h->pps_buffers + i);
8007 // memset(h, 0, sizeof(H264Context));
8013 AVCodec h264_decoder = {
8017 sizeof(H264Context),
8022 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8024 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8027 #if CONFIG_H264_VDPAU_DECODER
8028 AVCodec h264_vdpau_decoder = {
8032 sizeof(H264Context),
8037 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8039 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8043 #if CONFIG_SVQ3_DECODER