2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "x86/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
85 return (a&0xFFFF) + (b<<16);
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
104 #define LEVEL_TAB_BITS 8
105 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
107 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
108 MpegEncContext * const s = &h->s;
109 const int mb_xy= h->mb_xy;
110 int topleft_xy, top_xy, topright_xy, left_xy[2];
111 int topleft_type, top_type, topright_type, left_type[2];
112 const int * left_block;
113 int topleft_partition= -1;
116 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
118 //FIXME deblocking could skip the intra and nnz parts.
119 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
122 /* Wow, what a mess, why didn't they simplify the interlacing & intra
123 * stuff, I can't imagine that these complex rules are worth it. */
125 topleft_xy = top_xy - 1;
126 topright_xy= top_xy + 1;
127 left_xy[1] = left_xy[0] = mb_xy-1;
128 left_block = left_block_options[0];
130 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
131 const int top_pair_xy = pair_xy - s->mb_stride;
132 const int topleft_pair_xy = top_pair_xy - 1;
133 const int topright_pair_xy = top_pair_xy + 1;
134 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
135 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
136 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
137 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
138 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
139 const int bottom = (s->mb_y & 1);
140 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
142 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
143 top_xy -= s->mb_stride;
145 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
146 topleft_xy -= s->mb_stride;
147 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
148 topleft_xy += s->mb_stride;
149 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
150 topleft_partition = 0;
152 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
153 topright_xy -= s->mb_stride;
155 if (left_mb_field_flag != curr_mb_field_flag) {
156 left_xy[1] = left_xy[0] = pair_xy - 1;
157 if (curr_mb_field_flag) {
158 left_xy[1] += s->mb_stride;
159 left_block = left_block_options[3];
161 left_block= left_block_options[2 - bottom];
166 h->top_mb_xy = top_xy;
167 h->left_mb_xy[0] = left_xy[0];
168 h->left_mb_xy[1] = left_xy[1];
172 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
173 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
174 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
176 if(MB_MBAFF && !IS_INTRA(mb_type)){
178 for(list=0; list<h->list_count; list++){
179 //These values where changed for ease of performing MC, we need to change them back
180 //FIXME maybe we can make MC and loop filter use the same values or prevent
181 //the MC code from changing ref_cache and rather use a temporary array.
182 if(USES_LIST(mb_type,list)){
183 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
184 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
185 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
187 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
193 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
194 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
195 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
196 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
197 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
199 if(IS_INTRA(mb_type)){
200 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
201 h->topleft_samples_available=
202 h->top_samples_available=
203 h->left_samples_available= 0xFFFF;
204 h->topright_samples_available= 0xEEEA;
206 if(!(top_type & type_mask)){
207 h->topleft_samples_available= 0xB3FF;
208 h->top_samples_available= 0x33FF;
209 h->topright_samples_available= 0x26EA;
211 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
212 if(IS_INTERLACED(mb_type)){
213 if(!(left_type[0] & type_mask)){
214 h->topleft_samples_available&= 0xDFFF;
215 h->left_samples_available&= 0x5FFF;
217 if(!(left_type[1] & type_mask)){
218 h->topleft_samples_available&= 0xFF5F;
219 h->left_samples_available&= 0xFF5F;
222 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
223 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
224 assert(left_xy[0] == left_xy[1]);
225 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
226 h->topleft_samples_available&= 0xDF5F;
227 h->left_samples_available&= 0x5F5F;
231 if(!(left_type[0] & type_mask)){
232 h->topleft_samples_available&= 0xDF5F;
233 h->left_samples_available&= 0x5F5F;
237 if(!(topleft_type & type_mask))
238 h->topleft_samples_available&= 0x7FFF;
240 if(!(topright_type & type_mask))
241 h->topright_samples_available&= 0xFBFF;
243 if(IS_INTRA4x4(mb_type)){
244 if(IS_INTRA4x4(top_type)){
245 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
246 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
247 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
248 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
251 if(!(top_type & type_mask))
256 h->intra4x4_pred_mode_cache[4+8*0]=
257 h->intra4x4_pred_mode_cache[5+8*0]=
258 h->intra4x4_pred_mode_cache[6+8*0]=
259 h->intra4x4_pred_mode_cache[7+8*0]= pred;
262 if(IS_INTRA4x4(left_type[i])){
263 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
264 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
267 if(!(left_type[i] & type_mask))
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
289 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
291 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
292 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
293 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
294 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
296 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
297 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
299 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
300 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
303 h->non_zero_count_cache[4+8*0]=
304 h->non_zero_count_cache[5+8*0]=
305 h->non_zero_count_cache[6+8*0]=
306 h->non_zero_count_cache[7+8*0]=
308 h->non_zero_count_cache[1+8*0]=
309 h->non_zero_count_cache[2+8*0]=
311 h->non_zero_count_cache[1+8*3]=
312 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
316 for (i=0; i<2; i++) {
318 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
319 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
320 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
321 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
323 h->non_zero_count_cache[3+8*1 + 2*8*i]=
324 h->non_zero_count_cache[3+8*2 + 2*8*i]=
325 h->non_zero_count_cache[0+8*1 + 8*i]=
326 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 h->top_cbp = h->cbp_table[top_xy];
334 } else if(IS_INTRA(mb_type)) {
341 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
342 } else if(IS_INTRA(mb_type)) {
348 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
351 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
356 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
358 for(list=0; list<h->list_count; list++){
359 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
360 /*if(!h->mv_cache_clean[list]){
361 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
362 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
363 h->mv_cache_clean[list]= 1;
367 h->mv_cache_clean[list]= 0;
369 if(USES_LIST(top_type, list)){
370 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
371 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
372 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
373 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
376 h->ref_cache[list][scan8[0] + 0 - 1*8]=
377 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
378 h->ref_cache[list][scan8[0] + 2 - 1*8]=
379 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
381 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
382 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
385 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
389 int cache_idx = scan8[0] - 1 + i*2*8;
390 if(USES_LIST(left_type[i], list)){
391 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
392 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
393 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
394 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
395 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
396 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
398 *(uint32_t*)h->mv_cache [list][cache_idx ]=
399 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
400 h->ref_cache[list][cache_idx ]=
401 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
405 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
408 if(USES_LIST(topleft_type, list)){
409 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
410 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
411 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
412 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
418 if(USES_LIST(topright_type, list)){
419 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
420 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
421 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
422 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
424 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
428 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
431 h->ref_cache[list][scan8[5 ]+1] =
432 h->ref_cache[list][scan8[7 ]+1] =
433 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
434 h->ref_cache[list][scan8[4 ]] =
435 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
436 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
437 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
438 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
439 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
440 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
443 /* XXX beurk, Load mvd */
444 if(USES_LIST(top_type, list)){
445 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
446 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
447 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
451 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
452 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
456 if(USES_LIST(left_type[0], list)){
457 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
458 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
459 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
461 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
464 if(USES_LIST(left_type[1], list)){
465 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
466 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
469 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
472 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
473 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
474 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
475 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
476 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
478 if(h->slice_type_nos == FF_B_TYPE){
479 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
481 if(IS_DIRECT(top_type)){
482 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
483 }else if(IS_8X8(top_type)){
484 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
485 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
486 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
491 if(IS_DIRECT(left_type[0]))
492 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
493 else if(IS_8X8(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
496 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
498 if(IS_DIRECT(left_type[1]))
499 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
500 else if(IS_8X8(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
503 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
509 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
510 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
511 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
512 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
515 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
516 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
517 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
518 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
520 #define MAP_F2F(idx, mb_type)\
521 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
522 h->ref_cache[list][idx] <<= 1;\
523 h->mv_cache[list][idx][1] /= 2;\
524 h->mvd_cache[list][idx][1] /= 2;\
529 #define MAP_F2F(idx, mb_type)\
530 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] >>= 1;\
532 h->mv_cache[list][idx][1] <<= 1;\
533 h->mvd_cache[list][idx][1] <<= 1;\
543 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
546 static inline void write_back_intra_pred_mode(H264Context *h){
547 const int mb_xy= h->mb_xy;
549 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
550 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
551 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
552 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
553 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
554 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
555 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
559 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
561 static inline int check_intra4x4_pred_mode(H264Context *h){
562 MpegEncContext * const s = &h->s;
563 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
564 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
567 if(!(h->top_samples_available&0x8000)){
569 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
571 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
574 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
579 if((h->left_samples_available&0x8888)!=0x8888){
580 static const int mask[4]={0x8000,0x2000,0x80,0x20};
582 if(!(h->left_samples_available&mask[i])){
583 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
585 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
588 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
595 } //FIXME cleanup like next
598 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
600 static inline int check_intra_pred_mode(H264Context *h, int mode){
601 MpegEncContext * const s = &h->s;
602 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
603 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
606 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610 if(!(h->top_samples_available&0x8000)){
613 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
618 if((h->left_samples_available&0x8080) != 0x8080){
620 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
621 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
668 * gets the predicted number of non-zero coefficients.
669 * @param n block index
671 static inline int pred_non_zero_count(H264Context *h, int n){
672 const int index8= scan8[n];
673 const int left= h->non_zero_count_cache[index8 - 1];
674 const int top = h->non_zero_count_cache[index8 - 8];
677 if(i<64) i= (i+1)>>1;
679 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
684 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
685 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
686 MpegEncContext *s = &h->s;
688 /* there is no consistent mapping of mvs to neighboring locations that will
689 * make mbaff happy, so we can't move all this logic to fill_caches */
691 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
693 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
694 *C = h->mv_cache[list][scan8[0]-2];
697 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
698 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
699 if(IS_INTERLACED(mb_types[topright_xy])){
700 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
701 const int x4 = X4, y4 = Y4;\
702 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
703 if(!USES_LIST(mb_type,list))\
704 return LIST_NOT_USED;\
705 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
706 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
707 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
708 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
710 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
713 if(topright_ref == PART_NOT_AVAILABLE
714 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
715 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
717 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
718 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
721 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
723 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
724 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
730 if(topright_ref != PART_NOT_AVAILABLE){
731 *C= h->mv_cache[list][ i - 8 + part_width ];
734 tprintf(s->avctx, "topright MV not available\n");
736 *C= h->mv_cache[list][ i - 8 - 1 ];
737 return h->ref_cache[list][ i - 8 - 1 ];
742 * gets the predicted MV.
743 * @param n the block index
744 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
745 * @param mx the x component of the predicted motion vector
746 * @param my the y component of the predicted motion vector
748 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
749 const int index8= scan8[n];
750 const int top_ref= h->ref_cache[list][ index8 - 8 ];
751 const int left_ref= h->ref_cache[list][ index8 - 1 ];
752 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
753 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
755 int diagonal_ref, match_count;
757 assert(part_width==1 || part_width==2 || part_width==4);
767 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
768 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
769 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
770 if(match_count > 1){ //most common
771 *mx= mid_pred(A[0], B[0], C[0]);
772 *my= mid_pred(A[1], B[1], C[1]);
773 }else if(match_count==1){
777 }else if(top_ref==ref){
785 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
789 *mx= mid_pred(A[0], B[0], C[0]);
790 *my= mid_pred(A[1], B[1], C[1]);
794 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
798 * gets the directionally predicted 16x8 MV.
799 * @param n the block index
800 * @param mx the x component of the predicted motion vector
801 * @param my the y component of the predicted motion vector
803 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
805 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
806 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
808 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
816 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
817 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
819 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
829 pred_motion(h, n, 4, list, ref, mx, my);
833 * gets the directionally predicted 8x16 MV.
834 * @param n the block index
835 * @param mx the x component of the predicted motion vector
836 * @param my the y component of the predicted motion vector
838 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
840 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
841 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
843 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
854 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
856 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
858 if(diagonal_ref == ref){
866 pred_motion(h, n, 2, list, ref, mx, my);
869 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
870 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
871 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
873 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
875 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
876 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
877 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
883 pred_motion(h, 0, 4, 0, 0, mx, my);
888 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
889 int poc0 = h->ref_list[0][i].poc;
890 int td = av_clip(poc1 - poc0, -128, 127);
891 if(td == 0 || h->ref_list[0][i].long_ref){
894 int tb = av_clip(poc - poc0, -128, 127);
895 int tx = (16384 + (FFABS(td) >> 1)) / td;
896 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
900 static inline void direct_dist_scale_factor(H264Context * const h){
901 MpegEncContext * const s = &h->s;
902 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
903 const int poc1 = h->ref_list[1][0].poc;
905 for(field=0; field<2; field++){
906 const int poc = h->s.current_picture_ptr->field_poc[field];
907 const int poc1 = h->ref_list[1][0].field_poc[field];
908 for(i=0; i < 2*h->ref_count[0]; i++)
909 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
917 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
918 MpegEncContext * const s = &h->s;
919 Picture * const ref1 = &h->ref_list[1][0];
920 int j, old_ref, rfield;
921 int start= mbafi ? 16 : 0;
922 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
923 int interl= mbafi || s->picture_structure != PICT_FRAME;
925 /* bogus; fills in for missing frames */
926 memset(map[list], 0, sizeof(map[list]));
928 for(rfield=0; rfield<2; rfield++){
929 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
930 int poc = ref1->ref_poc[colfield][list][old_ref];
934 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
935 poc= (poc&~3) + rfield + 1;
937 for(j=start; j<end; j++){
938 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
939 int cur_ref= mbafi ? (j-16)^field : j;
940 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
942 map[list][old_ref] = cur_ref;
950 static inline void direct_ref_list_init(H264Context * const h){
951 MpegEncContext * const s = &h->s;
952 Picture * const ref1 = &h->ref_list[1][0];
953 Picture * const cur = s->current_picture_ptr;
955 int sidx= (s->picture_structure&1)^1;
956 int ref1sidx= (ref1->reference&1)^1;
958 for(list=0; list<2; list++){
959 cur->ref_count[sidx][list] = h->ref_count[list];
960 for(j=0; j<h->ref_count[list]; j++)
961 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
964 if(s->picture_structure == PICT_FRAME){
965 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
966 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
969 cur->mbaff= FRAME_MBAFF;
971 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
974 for(list=0; list<2; list++){
975 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
976 for(field=0; field<2; field++)
977 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
981 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
982 MpegEncContext * const s = &h->s;
983 int b8_stride = h->b8_stride;
984 int b4_stride = h->b_stride;
985 int mb_xy = h->mb_xy;
987 const int16_t (*l1mv0)[2], (*l1mv1)[2];
988 const int8_t *l1ref0, *l1ref1;
989 const int is_b8x8 = IS_8X8(*mb_type);
990 unsigned int sub_mb_type;
993 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
995 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
996 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
997 int cur_poc = s->current_picture_ptr->poc;
998 int *col_poc = h->ref_list[1]->field_poc;
999 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1000 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1002 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1003 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1004 mb_xy += s->mb_stride*fieldoff;
1007 }else{ // AFL/AFR/FR/FL -> AFR/FR
1008 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1009 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1010 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1011 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1014 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1015 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1016 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1018 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1019 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1021 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1022 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1024 }else{ // AFR/FR -> AFR/FR
1027 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1028 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1029 /* FIXME save sub mb types from previous frames (or derive from MVs)
1030 * so we know exactly what block size to use */
1031 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1032 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1034 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1035 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1037 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1038 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1043 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1044 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1045 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1046 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1049 l1ref0 += h->b8_stride;
1050 l1ref1 += h->b8_stride;
1051 l1mv0 += 2*b4_stride;
1052 l1mv1 += 2*b4_stride;
1056 if(h->direct_spatial_mv_pred){
1061 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1063 /* ref = min(neighbors) */
1064 for(list=0; list<2; list++){
1065 int refa = h->ref_cache[list][scan8[0] - 1];
1066 int refb = h->ref_cache[list][scan8[0] - 8];
1067 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1068 if(refc == PART_NOT_AVAILABLE)
1069 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1070 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1075 if(ref[0] < 0 && ref[1] < 0){
1076 ref[0] = ref[1] = 0;
1077 mv[0][0] = mv[0][1] =
1078 mv[1][0] = mv[1][1] = 0;
1080 for(list=0; list<2; list++){
1082 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1084 mv[list][0] = mv[list][1] = 0;
1090 *mb_type &= ~MB_TYPE_L1;
1091 sub_mb_type &= ~MB_TYPE_L1;
1092 }else if(ref[0] < 0){
1094 *mb_type &= ~MB_TYPE_L0;
1095 sub_mb_type &= ~MB_TYPE_L0;
1098 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1099 for(i8=0; i8<4; i8++){
1102 int xy8 = x8+y8*b8_stride;
1103 int xy4 = 3*x8+y8*b4_stride;
1106 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1108 h->sub_mb_type[i8] = sub_mb_type;
1110 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1111 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1112 if(!IS_INTRA(mb_type_col[y8])
1113 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1114 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1116 a= pack16to32(mv[0][0],mv[0][1]);
1118 b= pack16to32(mv[1][0],mv[1][1]);
1120 a= pack16to32(mv[0][0],mv[0][1]);
1121 b= pack16to32(mv[1][0],mv[1][1]);
1123 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1124 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1126 }else if(IS_16X16(*mb_type)){
1129 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1130 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1131 if(!IS_INTRA(mb_type_col[0])
1132 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1133 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1134 && (h->x264_build>33 || !h->x264_build)))){
1136 a= pack16to32(mv[0][0],mv[0][1]);
1138 b= pack16to32(mv[1][0],mv[1][1]);
1140 a= pack16to32(mv[0][0],mv[0][1]);
1141 b= pack16to32(mv[1][0],mv[1][1]);
1143 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1144 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1146 for(i8=0; i8<4; i8++){
1147 const int x8 = i8&1;
1148 const int y8 = i8>>1;
1150 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1152 h->sub_mb_type[i8] = sub_mb_type;
1154 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1155 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1156 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1157 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1160 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1161 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1162 && (h->x264_build>33 || !h->x264_build)))){
1163 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1164 if(IS_SUB_8X8(sub_mb_type)){
1165 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1166 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1168 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1170 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1173 for(i4=0; i4<4; i4++){
1174 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1179 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1185 }else{ /* direct temporal mv pred */
1186 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1187 const int *dist_scale_factor = h->dist_scale_factor;
1190 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1191 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1192 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1193 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1195 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1198 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1199 /* FIXME assumes direct_8x8_inference == 1 */
1200 int y_shift = 2*!IS_INTERLACED(*mb_type);
1202 for(i8=0; i8<4; i8++){
1203 const int x8 = i8&1;
1204 const int y8 = i8>>1;
1206 const int16_t (*l1mv)[2]= l1mv0;
1208 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1210 h->sub_mb_type[i8] = sub_mb_type;
1212 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1213 if(IS_INTRA(mb_type_col[y8])){
1214 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1216 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1220 ref0 = l1ref0[x8 + y8*b8_stride];
1222 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1224 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1227 scale = dist_scale_factor[ref0];
1228 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1231 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1232 int my_col = (mv_col[1]<<y_shift)/2;
1233 int mx = (scale * mv_col[0] + 128) >> 8;
1234 int my = (scale * my_col + 128) >> 8;
1235 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1236 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1242 /* one-to-one mv scaling */
1244 if(IS_16X16(*mb_type)){
1247 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1248 if(IS_INTRA(mb_type_col[0])){
1251 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1252 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1253 const int scale = dist_scale_factor[ref0];
1254 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1256 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1257 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1259 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1260 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1262 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1263 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1264 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1266 for(i8=0; i8<4; i8++){
1267 const int x8 = i8&1;
1268 const int y8 = i8>>1;
1270 const int16_t (*l1mv)[2]= l1mv0;
1272 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1274 h->sub_mb_type[i8] = sub_mb_type;
1275 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1276 if(IS_INTRA(mb_type_col[0])){
1277 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1278 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1279 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1283 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1285 ref0 = map_col_to_list0[0][ref0];
1287 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1290 scale = dist_scale_factor[ref0];
1292 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1293 if(IS_SUB_8X8(sub_mb_type)){
1294 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1295 int mx = (scale * mv_col[0] + 128) >> 8;
1296 int my = (scale * mv_col[1] + 128) >> 8;
1297 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1298 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1300 for(i4=0; i4<4; i4++){
1301 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1302 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1303 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1304 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1305 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1306 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1313 static inline void write_back_motion(H264Context *h, int mb_type){
1314 MpegEncContext * const s = &h->s;
1315 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1316 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1319 if(!USES_LIST(mb_type, 0))
1320 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1322 for(list=0; list<h->list_count; list++){
1324 if(!USES_LIST(mb_type, list))
1328 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1329 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1331 if( h->pps.cabac ) {
1332 if(IS_SKIP(mb_type))
1333 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1336 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1337 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1342 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1343 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1344 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1345 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1346 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1350 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1351 if(IS_8X8(mb_type)){
1352 uint8_t *direct_table = &h->direct_table[b8_xy];
1353 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1354 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1355 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1361 * Decodes a network abstraction layer unit.
1362 * @param consumed is the number of bytes used as input
1363 * @param length is the length of the array
1364 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1365 * @returns decoded bytes, might be src+1 if no escapes
1367 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1372 // src[0]&0x80; //forbidden bit
1373 h->nal_ref_idc= src[0]>>5;
1374 h->nal_unit_type= src[0]&0x1F;
1378 for(i=0; i<length; i++)
1379 printf("%2X ", src[i]);
1382 #ifdef HAVE_FAST_UNALIGNED
1383 # ifdef HAVE_FAST_64BIT
1385 for(i=0; i+1<length; i+=9){
1386 if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1389 for(i=0; i+1<length; i+=5){
1390 if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1393 if(i>0 && !src[i]) i--;
1397 for(i=0; i+1<length; i+=2){
1398 if(src[i]) continue;
1399 if(i>0 && src[i-1]==0) i--;
1401 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1403 /* startcode, so we must be past the end */
1411 if(i>=length-1){ //no escaped 0
1412 *dst_length= length;
1413 *consumed= length+1; //+1 for the header
1417 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1418 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1419 dst= h->rbsp_buffer[bufidx];
1425 //printf("decoding esc\n");
1426 memcpy(dst, src, i);
1429 //remove escapes (very rare 1:2^22)
1431 dst[di++]= src[si++];
1432 dst[di++]= src[si++];
1433 }else if(src[si]==0 && src[si+1]==0){
1434 if(src[si+2]==3){ //escape
1439 }else //next start code
1443 dst[di++]= src[si++];
1446 dst[di++]= src[si++];
1449 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1452 *consumed= si + 1;//+1 for the header
1453 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1458 * identifies the exact end of the bitstream
1459 * @return the length of the trailing, or 0 if damaged
1461 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1465 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1475 * IDCT transforms the 16 dc values and dequantizes them.
1476 * @param qp quantization parameter
1478 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1481 int temp[16]; //FIXME check if this is a good idea
1482 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1483 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1485 //memset(block, 64, 2*256);
1488 const int offset= y_offset[i];
1489 const int z0= block[offset+stride*0] + block[offset+stride*4];
1490 const int z1= block[offset+stride*0] - block[offset+stride*4];
1491 const int z2= block[offset+stride*1] - block[offset+stride*5];
1492 const int z3= block[offset+stride*1] + block[offset+stride*5];
1501 const int offset= x_offset[i];
1502 const int z0= temp[4*0+i] + temp[4*2+i];
1503 const int z1= temp[4*0+i] - temp[4*2+i];
1504 const int z2= temp[4*1+i] - temp[4*3+i];
1505 const int z3= temp[4*1+i] + temp[4*3+i];
1507 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1508 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1509 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1510 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1516 * DCT transforms the 16 dc values.
1517 * @param qp quantization parameter ??? FIXME
1519 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1520 // const int qmul= dequant_coeff[qp][0];
1522 int temp[16]; //FIXME check if this is a good idea
1523 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1524 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1527 const int offset= y_offset[i];
1528 const int z0= block[offset+stride*0] + block[offset+stride*4];
1529 const int z1= block[offset+stride*0] - block[offset+stride*4];
1530 const int z2= block[offset+stride*1] - block[offset+stride*5];
1531 const int z3= block[offset+stride*1] + block[offset+stride*5];
1540 const int offset= x_offset[i];
1541 const int z0= temp[4*0+i] + temp[4*2+i];
1542 const int z1= temp[4*0+i] - temp[4*2+i];
1543 const int z2= temp[4*1+i] - temp[4*3+i];
1544 const int z3= temp[4*1+i] + temp[4*3+i];
1546 block[stride*0 +offset]= (z0 + z3)>>1;
1547 block[stride*2 +offset]= (z1 + z2)>>1;
1548 block[stride*8 +offset]= (z1 - z2)>>1;
1549 block[stride*10+offset]= (z0 - z3)>>1;
1557 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1558 const int stride= 16*2;
1559 const int xStride= 16;
1562 a= block[stride*0 + xStride*0];
1563 b= block[stride*0 + xStride*1];
1564 c= block[stride*1 + xStride*0];
1565 d= block[stride*1 + xStride*1];
1572 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1573 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1574 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1575 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1579 static void chroma_dc_dct_c(DCTELEM *block){
1580 const int stride= 16*2;
1581 const int xStride= 16;
1584 a= block[stride*0 + xStride*0];
1585 b= block[stride*0 + xStride*1];
1586 c= block[stride*1 + xStride*0];
1587 d= block[stride*1 + xStride*1];
1594 block[stride*0 + xStride*0]= (a+c);
1595 block[stride*0 + xStride*1]= (e+b);
1596 block[stride*1 + xStride*0]= (a-c);
1597 block[stride*1 + xStride*1]= (e-b);
1602 * gets the chroma qp.
1604 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1605 return h->pps.chroma_qp_table[t][qscale];
1608 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1609 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1610 int src_x_offset, int src_y_offset,
1611 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1612 MpegEncContext * const s = &h->s;
1613 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1614 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1615 const int luma_xy= (mx&3) + ((my&3)<<2);
1616 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1617 uint8_t * src_cb, * src_cr;
1618 int extra_width= h->emu_edge_width;
1619 int extra_height= h->emu_edge_height;
1621 const int full_mx= mx>>2;
1622 const int full_my= my>>2;
1623 const int pic_width = 16*s->mb_width;
1624 const int pic_height = 16*s->mb_height >> MB_FIELD;
1626 if(mx&7) extra_width -= 3;
1627 if(my&7) extra_height -= 3;
1629 if( full_mx < 0-extra_width
1630 || full_my < 0-extra_height
1631 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1632 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1633 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1634 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1638 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1640 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1643 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1646 // chroma offset when predicting from a field of opposite parity
1647 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1648 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1650 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1651 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cb= s->edge_emu_buffer;
1657 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1661 src_cr= s->edge_emu_buffer;
1663 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1666 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1667 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1668 int x_offset, int y_offset,
1669 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1670 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1671 int list0, int list1){
1672 MpegEncContext * const s = &h->s;
1673 qpel_mc_func *qpix_op= qpix_put;
1674 h264_chroma_mc_func chroma_op= chroma_put;
1676 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1677 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1678 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1679 x_offset += 8*s->mb_x;
1680 y_offset += 8*(s->mb_y >> MB_FIELD);
1683 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1684 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1685 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1686 qpix_op, chroma_op);
1689 chroma_op= chroma_avg;
1693 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1694 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1695 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1696 qpix_op, chroma_op);
1700 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1701 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1702 int x_offset, int y_offset,
1703 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1704 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1705 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1706 int list0, int list1){
1707 MpegEncContext * const s = &h->s;
1709 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1710 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1711 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1712 x_offset += 8*s->mb_x;
1713 y_offset += 8*(s->mb_y >> MB_FIELD);
1716 /* don't optimize for luma-only case, since B-frames usually
1717 * use implicit weights => chroma too. */
1718 uint8_t *tmp_cb = s->obmc_scratchpad;
1719 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1720 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1721 int refn0 = h->ref_cache[0][ scan8[n] ];
1722 int refn1 = h->ref_cache[1][ scan8[n] ];
1724 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1725 dest_y, dest_cb, dest_cr,
1726 x_offset, y_offset, qpix_put, chroma_put);
1727 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1728 tmp_y, tmp_cb, tmp_cr,
1729 x_offset, y_offset, qpix_put, chroma_put);
1731 if(h->use_weight == 2){
1732 int weight0 = h->implicit_weight[refn0][refn1];
1733 int weight1 = 64 - weight0;
1734 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1736 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1739 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1740 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1741 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1742 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1743 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1744 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1745 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1746 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1749 int list = list1 ? 1 : 0;
1750 int refn = h->ref_cache[list][ scan8[n] ];
1751 Picture *ref= &h->ref_list[list][refn];
1752 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1753 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1754 qpix_put, chroma_put);
1756 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1757 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1758 if(h->use_weight_chroma){
1759 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1760 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1761 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1762 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1767 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1768 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1769 int x_offset, int y_offset,
1770 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1771 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1772 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1773 int list0, int list1){
1774 if((h->use_weight==2 && list0 && list1
1775 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1776 || h->use_weight==1)
1777 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1778 x_offset, y_offset, qpix_put, chroma_put,
1779 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1781 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1782 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1785 static inline void prefetch_motion(H264Context *h, int list){
1786 /* fetch pixels for estimated mv 4 macroblocks ahead
1787 * optimized for 64byte cache lines */
1788 MpegEncContext * const s = &h->s;
1789 const int refn = h->ref_cache[list][scan8[0]];
1791 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1792 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1793 uint8_t **src= h->ref_list[list][refn].data;
1794 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1795 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1796 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1797 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1801 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1802 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1803 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1804 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1805 MpegEncContext * const s = &h->s;
1806 const int mb_xy= h->mb_xy;
1807 const int mb_type= s->current_picture.mb_type[mb_xy];
1809 assert(IS_INTER(mb_type));
1811 prefetch_motion(h, 0);
1813 if(IS_16X16(mb_type)){
1814 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1815 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1816 &weight_op[0], &weight_avg[0],
1817 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1818 }else if(IS_16X8(mb_type)){
1819 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1820 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1821 &weight_op[1], &weight_avg[1],
1822 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1823 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1824 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1825 &weight_op[1], &weight_avg[1],
1826 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1827 }else if(IS_8X16(mb_type)){
1828 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1829 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1830 &weight_op[2], &weight_avg[2],
1831 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1832 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1833 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1834 &weight_op[2], &weight_avg[2],
1835 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1839 assert(IS_8X8(mb_type));
1842 const int sub_mb_type= h->sub_mb_type[i];
1844 int x_offset= (i&1)<<2;
1845 int y_offset= (i&2)<<1;
1847 if(IS_SUB_8X8(sub_mb_type)){
1848 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1849 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1850 &weight_op[3], &weight_avg[3],
1851 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1852 }else if(IS_SUB_8X4(sub_mb_type)){
1853 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1854 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1855 &weight_op[4], &weight_avg[4],
1856 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1857 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1858 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1859 &weight_op[4], &weight_avg[4],
1860 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1861 }else if(IS_SUB_4X8(sub_mb_type)){
1862 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1863 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1864 &weight_op[5], &weight_avg[5],
1865 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1866 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1867 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1868 &weight_op[5], &weight_avg[5],
1869 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1872 assert(IS_SUB_4X4(sub_mb_type));
1874 int sub_x_offset= x_offset + 2*(j&1);
1875 int sub_y_offset= y_offset + (j&2);
1876 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1877 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1878 &weight_op[6], &weight_avg[6],
1879 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1885 prefetch_motion(h, 1);
1888 static av_cold void init_cavlc_level_tab(void){
1889 int suffix_length, mask;
1892 for(suffix_length=0; suffix_length<7; suffix_length++){
1893 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1894 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1895 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1897 mask= -(level_code&1);
1898 level_code= (((2+level_code)>>1) ^ mask) - mask;
1899 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1900 cavlc_level_tab[suffix_length][i][0]= level_code;
1901 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1902 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1903 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1904 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1906 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1907 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1913 static av_cold void decode_init_vlc(void){
1914 static int done = 0;
1921 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1922 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1923 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1924 &chroma_dc_coeff_token_len [0], 1, 1,
1925 &chroma_dc_coeff_token_bits[0], 1, 1,
1926 INIT_VLC_USE_NEW_STATIC);
1930 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1931 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1932 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1933 &coeff_token_len [i][0], 1, 1,
1934 &coeff_token_bits[i][0], 1, 1,
1935 INIT_VLC_USE_NEW_STATIC);
1936 offset += coeff_token_vlc_tables_size[i];
1939 * This is a one time safety check to make sure that
1940 * the packed static coeff_token_vlc table sizes
1941 * were initialized correctly.
1943 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1946 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1947 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1948 init_vlc(&chroma_dc_total_zeros_vlc[i],
1949 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1950 &chroma_dc_total_zeros_len [i][0], 1, 1,
1951 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1952 INIT_VLC_USE_NEW_STATIC);
1954 for(i=0; i<15; i++){
1955 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1956 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1957 init_vlc(&total_zeros_vlc[i],
1958 TOTAL_ZEROS_VLC_BITS, 16,
1959 &total_zeros_len [i][0], 1, 1,
1960 &total_zeros_bits[i][0], 1, 1,
1961 INIT_VLC_USE_NEW_STATIC);
1965 run_vlc[i].table = run_vlc_tables[i];
1966 run_vlc[i].table_allocated = run_vlc_tables_size;
1967 init_vlc(&run_vlc[i],
1969 &run_len [i][0], 1, 1,
1970 &run_bits[i][0], 1, 1,
1971 INIT_VLC_USE_NEW_STATIC);
1973 run7_vlc.table = run7_vlc_table,
1974 run7_vlc.table_allocated = run7_vlc_table_size;
1975 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1976 &run_len [6][0], 1, 1,
1977 &run_bits[6][0], 1, 1,
1978 INIT_VLC_USE_NEW_STATIC);
1980 init_cavlc_level_tab();
1984 static void free_tables(H264Context *h){
1987 av_freep(&h->intra4x4_pred_mode);
1988 av_freep(&h->chroma_pred_mode_table);
1989 av_freep(&h->cbp_table);
1990 av_freep(&h->mvd_table[0]);
1991 av_freep(&h->mvd_table[1]);
1992 av_freep(&h->direct_table);
1993 av_freep(&h->non_zero_count);
1994 av_freep(&h->slice_table_base);
1995 h->slice_table= NULL;
1997 av_freep(&h->mb2b_xy);
1998 av_freep(&h->mb2b8_xy);
2000 for(i = 0; i < h->s.avctx->thread_count; i++) {
2001 hx = h->thread_context[i];
2003 av_freep(&hx->top_borders[1]);
2004 av_freep(&hx->top_borders[0]);
2005 av_freep(&hx->s.obmc_scratchpad);
2009 static void init_dequant8_coeff_table(H264Context *h){
2011 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2012 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2013 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2015 for(i=0; i<2; i++ ){
2016 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2017 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2021 for(q=0; q<52; q++){
2022 int shift = div6[q];
2025 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2026 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2027 h->pps.scaling_matrix8[i][x]) << shift;
2032 static void init_dequant4_coeff_table(H264Context *h){
2034 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2035 for(i=0; i<6; i++ ){
2036 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2038 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2039 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2046 for(q=0; q<52; q++){
2047 int shift = div6[q] + 2;
2050 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2051 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2052 h->pps.scaling_matrix4[i][x]) << shift;
2057 static void init_dequant_tables(H264Context *h){
2059 init_dequant4_coeff_table(h);
2060 if(h->pps.transform_8x8_mode)
2061 init_dequant8_coeff_table(h);
2062 if(h->sps.transform_bypass){
2065 h->dequant4_coeff[i][0][x] = 1<<6;
2066 if(h->pps.transform_8x8_mode)
2069 h->dequant8_coeff[i][0][x] = 1<<6;
2076 * needs width/height
2078 static int alloc_tables(H264Context *h){
2079 MpegEncContext * const s = &h->s;
2080 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2083 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2085 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2086 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2087 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2089 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2090 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2091 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2092 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2094 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2095 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2097 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2098 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2099 for(y=0; y<s->mb_height; y++){
2100 for(x=0; x<s->mb_width; x++){
2101 const int mb_xy= x + y*s->mb_stride;
2102 const int b_xy = 4*x + 4*y*h->b_stride;
2103 const int b8_xy= 2*x + 2*y*h->b8_stride;
2105 h->mb2b_xy [mb_xy]= b_xy;
2106 h->mb2b8_xy[mb_xy]= b8_xy;
2110 s->obmc_scratchpad = NULL;
2112 if(!h->dequant4_coeff[0])
2113 init_dequant_tables(h);
2122 * Mimic alloc_tables(), but for every context thread.
2124 static void clone_tables(H264Context *dst, H264Context *src){
2125 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2126 dst->non_zero_count = src->non_zero_count;
2127 dst->slice_table = src->slice_table;
2128 dst->cbp_table = src->cbp_table;
2129 dst->mb2b_xy = src->mb2b_xy;
2130 dst->mb2b8_xy = src->mb2b8_xy;
2131 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2132 dst->mvd_table[0] = src->mvd_table[0];
2133 dst->mvd_table[1] = src->mvd_table[1];
2134 dst->direct_table = src->direct_table;
2136 dst->s.obmc_scratchpad = NULL;
2137 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2142 * Allocate buffers which are not shared amongst multiple threads.
2144 static int context_init(H264Context *h){
2145 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2146 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2150 return -1; // free_tables will clean up for us
2153 static av_cold void common_init(H264Context *h){
2154 MpegEncContext * const s = &h->s;
2156 s->width = s->avctx->width;
2157 s->height = s->avctx->height;
2158 s->codec_id= s->avctx->codec->id;
2160 ff_h264_pred_init(&h->hpc, s->codec_id);
2162 h->dequant_coeff_pps= -1;
2163 s->unrestricted_mv=1;
2164 s->decode=1; //FIXME
2166 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2168 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2169 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2172 static av_cold int decode_init(AVCodecContext *avctx){
2173 H264Context *h= avctx->priv_data;
2174 MpegEncContext * const s = &h->s;
2176 MPV_decode_defaults(s);
2181 s->out_format = FMT_H264;
2182 s->workaround_bugs= avctx->workaround_bugs;
2185 // s->decode_mb= ff_h263_decode_mb;
2186 s->quarter_sample = 1;
2189 if(avctx->codec_id == CODEC_ID_SVQ3)
2190 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2192 avctx->pix_fmt= PIX_FMT_YUV420P;
2196 if(avctx->extradata_size > 0 && avctx->extradata &&
2197 *(char *)avctx->extradata == 1){
2204 h->thread_context[0] = h;
2205 h->outputed_poc = INT_MIN;
2206 h->prev_poc_msb= 1<<16;
2210 static int frame_start(H264Context *h){
2211 MpegEncContext * const s = &h->s;
2214 if(MPV_frame_start(s, s->avctx) < 0)
2216 ff_er_frame_start(s);
2218 * MPV_frame_start uses pict_type to derive key_frame.
2219 * This is incorrect for H.264; IDR markings must be used.
2220 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2221 * See decode_nal_units().
2223 s->current_picture_ptr->key_frame= 0;
2225 assert(s->linesize && s->uvlinesize);
2227 for(i=0; i<16; i++){
2228 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2229 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2232 h->block_offset[16+i]=
2233 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2234 h->block_offset[24+16+i]=
2235 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2238 /* can't be in alloc_tables because linesize isn't known there.
2239 * FIXME: redo bipred weight to not require extra buffer? */
2240 for(i = 0; i < s->avctx->thread_count; i++)
2241 if(!h->thread_context[i]->s.obmc_scratchpad)
2242 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2244 /* some macroblocks will be accessed before they're available */
2245 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2246 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2248 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2250 // We mark the current picture as non-reference after allocating it, so
2251 // that if we break out due to an error it can be released automatically
2252 // in the next MPV_frame_start().
2253 // SVQ3 as well as most other codecs have only last/next/current and thus
2254 // get released even with set reference, besides SVQ3 and others do not
2255 // mark frames as reference later "naturally".
2256 if(s->codec_id != CODEC_ID_SVQ3)
2257 s->current_picture_ptr->reference= 0;
2259 s->current_picture_ptr->field_poc[0]=
2260 s->current_picture_ptr->field_poc[1]= INT_MAX;
2261 assert(s->current_picture_ptr->long_ref==0);
2266 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2267 MpegEncContext * const s = &h->s;
2276 src_cb -= uvlinesize;
2277 src_cr -= uvlinesize;
2279 if(!simple && FRAME_MBAFF){
2281 offset = MB_MBAFF ? 1 : 17;
2282 uvoffset= MB_MBAFF ? 1 : 9;
2284 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2286 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2287 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2288 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2293 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2294 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2295 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2296 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2302 top_idx = MB_MBAFF ? 0 : 1;
2304 step= MB_MBAFF ? 2 : 1;
2307 // There are two lines saved, the line above the the top macroblock of a pair,
2308 // and the line above the bottom macroblock
2309 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2310 for(i=1; i<17 - skiplast; i++){
2311 h->left_border[offset+i*step]= src_y[15+i* linesize];
2314 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2315 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2317 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2318 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2319 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2320 for(i=1; i<9 - skiplast; i++){
2321 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2322 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2324 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2325 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2329 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2330 MpegEncContext * const s = &h->s;
2341 if(!simple && FRAME_MBAFF){
2343 offset = MB_MBAFF ? 1 : 17;
2344 uvoffset= MB_MBAFF ? 1 : 9;
2348 top_idx = MB_MBAFF ? 0 : 1;
2350 step= MB_MBAFF ? 2 : 1;
2353 if(h->deblocking_filter == 2) {
2355 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2356 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2358 deblock_left = (s->mb_x > 0);
2359 deblock_top = (s->mb_y > !!MB_FIELD);
2362 src_y -= linesize + 1;
2363 src_cb -= uvlinesize + 1;
2364 src_cr -= uvlinesize + 1;
2366 #define XCHG(a,b,t,xchg)\
2373 for(i = !deblock_top; i<16; i++){
2374 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2376 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2380 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2381 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2382 if(s->mb_x+1 < s->mb_width){
2383 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2387 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2389 for(i = !deblock_top; i<8; i++){
2390 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2391 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2393 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2394 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2397 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2403 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2404 MpegEncContext * const s = &h->s;
2405 const int mb_x= s->mb_x;
2406 const int mb_y= s->mb_y;
2407 const int mb_xy= h->mb_xy;
2408 const int mb_type= s->current_picture.mb_type[mb_xy];
2409 uint8_t *dest_y, *dest_cb, *dest_cr;
2410 int linesize, uvlinesize /*dct_offset*/;
2412 int *block_offset = &h->block_offset[0];
2413 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2414 const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2415 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2416 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2418 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2419 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2420 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2422 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2423 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2425 if (!simple && MB_FIELD) {
2426 linesize = h->mb_linesize = s->linesize * 2;
2427 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2428 block_offset = &h->block_offset[24];
2429 if(mb_y&1){ //FIXME move out of this function?
2430 dest_y -= s->linesize*15;
2431 dest_cb-= s->uvlinesize*7;
2432 dest_cr-= s->uvlinesize*7;
2436 for(list=0; list<h->list_count; list++){
2437 if(!USES_LIST(mb_type, list))
2439 if(IS_16X16(mb_type)){
2440 int8_t *ref = &h->ref_cache[list][scan8[0]];
2441 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2443 for(i=0; i<16; i+=4){
2444 int ref = h->ref_cache[list][scan8[i]];
2446 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2452 linesize = h->mb_linesize = s->linesize;
2453 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2454 // dct_offset = s->linesize * 16;
2457 if (!simple && IS_INTRA_PCM(mb_type)) {
2458 for (i=0; i<16; i++) {
2459 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2461 for (i=0; i<8; i++) {
2462 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2463 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2466 if(IS_INTRA(mb_type)){
2467 if(h->deblocking_filter)
2468 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2470 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2471 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2472 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2475 if(IS_INTRA4x4(mb_type)){
2476 if(simple || !s->encoding){
2477 if(IS_8x8DCT(mb_type)){
2478 if(transform_bypass){
2480 idct_add = s->dsp.add_pixels8;
2482 idct_dc_add = s->dsp.h264_idct8_dc_add;
2483 idct_add = s->dsp.h264_idct8_add;
2485 for(i=0; i<16; i+=4){
2486 uint8_t * const ptr= dest_y + block_offset[i];
2487 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2488 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2489 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2491 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2492 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2493 (h->topright_samples_available<<i)&0x4000, linesize);
2495 if(nnz == 1 && h->mb[i*16])
2496 idct_dc_add(ptr, h->mb + i*16, linesize);
2498 idct_add (ptr, h->mb + i*16, linesize);
2503 if(transform_bypass){
2505 idct_add = s->dsp.add_pixels4;
2507 idct_dc_add = s->dsp.h264_idct_dc_add;
2508 idct_add = s->dsp.h264_idct_add;
2510 for(i=0; i<16; i++){
2511 uint8_t * const ptr= dest_y + block_offset[i];
2512 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2514 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2515 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2519 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2520 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2521 assert(mb_y || linesize <= block_offset[i]);
2522 if(!topright_avail){
2523 tr= ptr[3 - linesize]*0x01010101;
2524 topright= (uint8_t*) &tr;
2526 topright= ptr + 4 - linesize;
2530 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2531 nnz = h->non_zero_count_cache[ scan8[i] ];
2534 if(nnz == 1 && h->mb[i*16])
2535 idct_dc_add(ptr, h->mb + i*16, linesize);
2537 idct_add (ptr, h->mb + i*16, linesize);
2539 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2546 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2548 if(!transform_bypass)
2549 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2551 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2553 if(h->deblocking_filter)
2554 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2556 hl_motion(h, dest_y, dest_cb, dest_cr,
2557 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2558 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2559 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2563 if(!IS_INTRA4x4(mb_type)){
2565 if(IS_INTRA16x16(mb_type)){
2566 if(transform_bypass){
2567 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2568 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2570 for(i=0; i<16; i++){
2571 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2572 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2576 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2578 }else if(h->cbp&15){
2579 if(transform_bypass){
2580 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2581 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2582 for(i=0; i<16; i+=di){
2583 if(h->non_zero_count_cache[ scan8[i] ]){
2584 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2588 if(IS_8x8DCT(mb_type)){
2589 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2591 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2596 for(i=0; i<16; i++){
2597 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2598 uint8_t * const ptr= dest_y + block_offset[i];
2599 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2605 if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2606 uint8_t *dest[2] = {dest_cb, dest_cr};
2607 if(transform_bypass){
2608 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2609 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2610 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2612 idct_add = s->dsp.add_pixels4;
2613 for(i=16; i<16+8; i++){
2614 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2615 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2619 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2620 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2622 idct_add = s->dsp.h264_idct_add;
2623 idct_dc_add = s->dsp.h264_idct_dc_add;
2624 for(i=16; i<16+8; i++){
2625 if(h->non_zero_count_cache[ scan8[i] ])
2626 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2627 else if(h->mb[i*16])
2628 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2631 for(i=16; i<16+8; i++){
2632 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2633 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2634 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2641 if(h->cbp || IS_INTRA(mb_type))
2642 s->dsp.clear_blocks(h->mb);
2644 if(h->deblocking_filter) {
2645 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2646 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2647 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2648 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2649 if (!simple && FRAME_MBAFF) {
2650 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2652 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2658 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2660 static void hl_decode_mb_simple(H264Context *h){
2661 hl_decode_mb_internal(h, 1);
2665 * Process a macroblock; this handles edge cases, such as interlacing.
2667 static void av_noinline hl_decode_mb_complex(H264Context *h){
2668 hl_decode_mb_internal(h, 0);
2671 static void hl_decode_mb(H264Context *h){
2672 MpegEncContext * const s = &h->s;
2673 const int mb_xy= h->mb_xy;
2674 const int mb_type= s->current_picture.mb_type[mb_xy];
2675 int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2677 if(ENABLE_H264_ENCODER && !s->decode)
2681 hl_decode_mb_complex(h);
2682 else hl_decode_mb_simple(h);
2685 static void pic_as_field(Picture *pic, const int parity){
2687 for (i = 0; i < 4; ++i) {
2688 if (parity == PICT_BOTTOM_FIELD)
2689 pic->data[i] += pic->linesize[i];
2690 pic->reference = parity;
2691 pic->linesize[i] *= 2;
2693 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2696 static int split_field_copy(Picture *dest, Picture *src,
2697 int parity, int id_add){
2698 int match = !!(src->reference & parity);
2702 if(parity != PICT_FRAME){
2703 pic_as_field(dest, parity);
2705 dest->pic_id += id_add;
2712 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2716 while(i[0]<len || i[1]<len){
2717 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2719 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2722 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2726 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2727 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2734 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2739 best_poc= dir ? INT_MIN : INT_MAX;
2741 for(i=0; i<len; i++){
2742 const int poc= src[i]->poc;
2743 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2745 sorted[out_i]= src[i];
2748 if(best_poc == (dir ? INT_MIN : INT_MAX))
2750 limit= sorted[out_i++]->poc - dir;
2756 * fills the default_ref_list.
2758 static int fill_default_ref_list(H264Context *h){
2759 MpegEncContext * const s = &h->s;
2762 if(h->slice_type_nos==FF_B_TYPE){
2763 Picture *sorted[32];
2768 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2770 cur_poc= s->current_picture_ptr->poc;
2772 for(list= 0; list<2; list++){
2773 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2774 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2776 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2777 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2780 if(len < h->ref_count[list])
2781 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2785 if(lens[0] == lens[1] && lens[1] > 1){
2786 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2788 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2791 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2792 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2794 if(len < h->ref_count[0])
2795 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2798 for (i=0; i<h->ref_count[0]; i++) {
2799 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2801 if(h->slice_type_nos==FF_B_TYPE){
2802 for (i=0; i<h->ref_count[1]; i++) {
2803 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2810 static void print_short_term(H264Context *h);
2811 static void print_long_term(H264Context *h);
2814 * Extract structure information about the picture described by pic_num in
2815 * the current decoding context (frame or field). Note that pic_num is
2816 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2817 * @param pic_num picture number for which to extract structure information
2818 * @param structure one of PICT_XXX describing structure of picture
2820 * @return frame number (short term) or long term index of picture
2821 * described by pic_num
2823 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2824 MpegEncContext * const s = &h->s;
2826 *structure = s->picture_structure;
2829 /* opposite field */
2830 *structure ^= PICT_FRAME;
2837 static int decode_ref_pic_list_reordering(H264Context *h){
2838 MpegEncContext * const s = &h->s;
2839 int list, index, pic_structure;
2841 print_short_term(h);
2844 for(list=0; list<h->list_count; list++){
2845 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2847 if(get_bits1(&s->gb)){
2848 int pred= h->curr_pic_num;
2850 for(index=0; ; index++){
2851 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2852 unsigned int pic_id;
2854 Picture *ref = NULL;
2856 if(reordering_of_pic_nums_idc==3)
2859 if(index >= h->ref_count[list]){
2860 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2864 if(reordering_of_pic_nums_idc<3){
2865 if(reordering_of_pic_nums_idc<2){
2866 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2869 if(abs_diff_pic_num > h->max_pic_num){
2870 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2874 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2875 else pred+= abs_diff_pic_num;
2876 pred &= h->max_pic_num - 1;
2878 frame_num = pic_num_extract(h, pred, &pic_structure);
2880 for(i= h->short_ref_count-1; i>=0; i--){
2881 ref = h->short_ref[i];
2882 assert(ref->reference);
2883 assert(!ref->long_ref);
2885 ref->frame_num == frame_num &&
2886 (ref->reference & pic_structure)
2894 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2896 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2899 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2902 ref = h->long_ref[long_idx];
2903 assert(!(ref && !ref->reference));
2904 if(ref && (ref->reference & pic_structure)){
2905 ref->pic_id= pic_id;
2906 assert(ref->long_ref);
2914 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2915 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2917 for(i=index; i+1<h->ref_count[list]; i++){
2918 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2921 for(; i > index; i--){
2922 h->ref_list[list][i]= h->ref_list[list][i-1];
2924 h->ref_list[list][index]= *ref;
2926 pic_as_field(&h->ref_list[list][index], pic_structure);
2930 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2936 for(list=0; list<h->list_count; list++){
2937 for(index= 0; index < h->ref_count[list]; index++){
2938 if(!h->ref_list[list][index].data[0]){
2939 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2940 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2948 static void fill_mbaff_ref_list(H264Context *h){
2950 for(list=0; list<2; list++){ //FIXME try list_count
2951 for(i=0; i<h->ref_count[list]; i++){
2952 Picture *frame = &h->ref_list[list][i];
2953 Picture *field = &h->ref_list[list][16+2*i];
2956 field[0].linesize[j] <<= 1;
2957 field[0].reference = PICT_TOP_FIELD;
2958 field[0].poc= field[0].field_poc[0];
2959 field[1] = field[0];
2961 field[1].data[j] += frame->linesize[j];
2962 field[1].reference = PICT_BOTTOM_FIELD;
2963 field[1].poc= field[1].field_poc[1];
2965 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2966 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2968 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2969 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2973 for(j=0; j<h->ref_count[1]; j++){
2974 for(i=0; i<h->ref_count[0]; i++)
2975 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2976 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2977 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2981 static int pred_weight_table(H264Context *h){
2982 MpegEncContext * const s = &h->s;
2984 int luma_def, chroma_def;
2987 h->use_weight_chroma= 0;
2988 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2989 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2990 luma_def = 1<<h->luma_log2_weight_denom;
2991 chroma_def = 1<<h->chroma_log2_weight_denom;
2993 for(list=0; list<2; list++){
2994 for(i=0; i<h->ref_count[list]; i++){
2995 int luma_weight_flag, chroma_weight_flag;
2997 luma_weight_flag= get_bits1(&s->gb);
2998 if(luma_weight_flag){
2999 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3000 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3001 if( h->luma_weight[list][i] != luma_def
3002 || h->luma_offset[list][i] != 0)
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
3017 || h->chroma_offset[list][i][j] != 0)
3018 h->use_weight_chroma= 1;
3023 h->chroma_weight[list][i][j]= chroma_def;
3024 h->chroma_offset[list][i][j]= 0;
3029 if(h->slice_type_nos != FF_B_TYPE) break;
3031 h->use_weight= h->use_weight || h->use_weight_chroma;
3035 static void implicit_weight_table(H264Context *h){
3036 MpegEncContext * const s = &h->s;
3038 int cur_poc = s->current_picture_ptr->poc;
3040 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3041 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3043 h->use_weight_chroma= 0;
3048 h->use_weight_chroma= 2;
3049 h->luma_log2_weight_denom= 5;
3050 h->chroma_log2_weight_denom= 5;
3052 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3053 int poc0 = h->ref_list[0][ref0].poc;
3054 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3055 int poc1 = h->ref_list[1][ref1].poc;
3056 int td = av_clip(poc1 - poc0, -128, 127);
3058 int tb = av_clip(cur_poc - poc0, -128, 127);
3059 int tx = (16384 + (FFABS(td) >> 1)) / td;
3060 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3061 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3062 h->implicit_weight[ref0][ref1] = 32;
3064 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3066 h->implicit_weight[ref0][ref1] = 32;
3072 * Mark a picture as no longer needed for reference. The refmask
3073 * argument allows unreferencing of individual fields or the whole frame.
3074 * If the picture becomes entirely unreferenced, but is being held for
3075 * display purposes, it is marked as such.
3076 * @param refmask mask of fields to unreference; the mask is bitwise
3077 * anded with the reference marking of pic
3078 * @return non-zero if pic becomes entirely unreferenced (except possibly
3079 * for display purposes) zero if one of the fields remains in
3082 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3084 if (pic->reference &= refmask) {
3087 for(i = 0; h->delayed_pic[i]; i++)
3088 if(pic == h->delayed_pic[i]){
3089 pic->reference=DELAYED_PIC_REF;
3097 * instantaneous decoder refresh.
3099 static void idr(H264Context *h){
3102 for(i=0; i<16; i++){
3103 remove_long(h, i, 0);
3105 assert(h->long_ref_count==0);
3107 for(i=0; i<h->short_ref_count; i++){
3108 unreference_pic(h, h->short_ref[i], 0);
3109 h->short_ref[i]= NULL;
3111 h->short_ref_count=0;
3112 h->prev_frame_num= 0;
3113 h->prev_frame_num_offset= 0;
3118 /* forget old pics after a seek */
3119 static void flush_dpb(AVCodecContext *avctx){
3120 H264Context *h= avctx->priv_data;
3122 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3123 if(h->delayed_pic[i])
3124 h->delayed_pic[i]->reference= 0;
3125 h->delayed_pic[i]= NULL;
3127 h->outputed_poc= INT_MIN;
3129 if(h->s.current_picture_ptr)
3130 h->s.current_picture_ptr->reference= 0;
3131 h->s.first_field= 0;
3132 ff_mpeg_flush(avctx);
3136 * Find a Picture in the short term reference list by frame number.
3137 * @param frame_num frame number to search for
3138 * @param idx the index into h->short_ref where returned picture is found
3139 * undefined if no picture found.
3140 * @return pointer to the found picture, or NULL if no pic with the provided
3141 * frame number is found
3143 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3144 MpegEncContext * const s = &h->s;
3147 for(i=0; i<h->short_ref_count; i++){
3148 Picture *pic= h->short_ref[i];
3149 if(s->avctx->debug&FF_DEBUG_MMCO)
3150 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3151 if(pic->frame_num == frame_num) {
3160 * Remove a picture from the short term reference list by its index in
3161 * that list. This does no checking on the provided index; it is assumed
3162 * to be valid. Other list entries are shifted down.
3163 * @param i index into h->short_ref of picture to remove.
3165 static void remove_short_at_index(H264Context *h, int i){
3166 assert(i >= 0 && i < h->short_ref_count);
3167 h->short_ref[i]= NULL;
3168 if (--h->short_ref_count)
3169 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3174 * @return the removed picture or NULL if an error occurs
3176 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3177 MpegEncContext * const s = &h->s;
3181 if(s->avctx->debug&FF_DEBUG_MMCO)
3182 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3184 pic = find_short(h, frame_num, &i);
3186 if(unreference_pic(h, pic, ref_mask))
3187 remove_short_at_index(h, i);
3194 * Remove a picture from the long term reference list by its index in
3196 * @return the removed picture or NULL if an error occurs
3198 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3201 pic= h->long_ref[i];
3203 if(unreference_pic(h, pic, ref_mask)){
3204 assert(h->long_ref[i]->long_ref == 1);
3205 h->long_ref[i]->long_ref= 0;
3206 h->long_ref[i]= NULL;
3207 h->long_ref_count--;
3215 * print short term list
3217 static void print_short_term(H264Context *h) {
3219 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3220 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3221 for(i=0; i<h->short_ref_count; i++){
3222 Picture *pic= h->short_ref[i];
3223 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3229 * print long term list
3231 static void print_long_term(H264Context *h) {
3233 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3234 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3235 for(i = 0; i < 16; i++){
3236 Picture *pic= h->long_ref[i];
3238 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3245 * Executes the reference picture marking (memory management control operations).
3247 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3248 MpegEncContext * const s = &h->s;
3250 int current_ref_assigned=0;
3253 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3256 for(i=0; i<mmco_count; i++){
3257 int structure, frame_num;
3258 if(s->avctx->debug&FF_DEBUG_MMCO)
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3261 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3262 || mmco[i].opcode == MMCO_SHORT2LONG){
3263 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3264 pic = find_short(h, frame_num, &j);
3266 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3267 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3268 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3273 switch(mmco[i].opcode){
3274 case MMCO_SHORT2UNUSED:
3275 if(s->avctx->debug&FF_DEBUG_MMCO)
3276 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3277 remove_short(h, frame_num, structure ^ PICT_FRAME);
3279 case MMCO_SHORT2LONG:
3280 if (h->long_ref[mmco[i].long_arg] != pic)
3281 remove_long(h, mmco[i].long_arg, 0);
3283 remove_short_at_index(h, j);
3284 h->long_ref[ mmco[i].long_arg ]= pic;
3285 if (h->long_ref[ mmco[i].long_arg ]){
3286 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3287 h->long_ref_count++;
3290 case MMCO_LONG2UNUSED:
3291 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3292 pic = h->long_ref[j];
3294 remove_long(h, j, structure ^ PICT_FRAME);
3295 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3296 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3299 // Comment below left from previous code as it is an interresting note.
3300 /* First field in pair is in short term list or
3301 * at a different long term index.
3302 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3303 * Report the problem and keep the pair where it is,
3304 * and mark this field valid.
3307 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3308 remove_long(h, mmco[i].long_arg, 0);
3310 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3311 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3312 h->long_ref_count++;
3315 s->current_picture_ptr->reference |= s->picture_structure;
3316 current_ref_assigned=1;
3318 case MMCO_SET_MAX_LONG:
3319 assert(mmco[i].long_arg <= 16);
3320 // just remove the long term which index is greater than new max
3321 for(j = mmco[i].long_arg; j<16; j++){
3322 remove_long(h, j, 0);
3326 while(h->short_ref_count){
3327 remove_short(h, h->short_ref[0]->frame_num, 0);
3329 for(j = 0; j < 16; j++) {
3330 remove_long(h, j, 0);
3332 s->current_picture_ptr->poc=
3333 s->current_picture_ptr->field_poc[0]=
3334 s->current_picture_ptr->field_poc[1]=
3338 s->current_picture_ptr->frame_num= 0;
3344 if (!current_ref_assigned) {
3345 /* Second field of complementary field pair; the first field of
3346 * which is already referenced. If short referenced, it
3347 * should be first entry in short_ref. If not, it must exist
3348 * in long_ref; trying to put it on the short list here is an
3349 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3351 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3352 /* Just mark the second field valid */
3353 s->current_picture_ptr->reference = PICT_FRAME;
3354 } else if (s->current_picture_ptr->long_ref) {
3355 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3356 "assignment for second field "
3357 "in complementary field pair "
3358 "(first field is long term)\n");
3360 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3362 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3365 if(h->short_ref_count)
3366 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3368 h->short_ref[0]= s->current_picture_ptr;
3369 h->short_ref_count++;
3370 s->current_picture_ptr->reference |= s->picture_structure;
3374 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3376 /* We have too many reference frames, probably due to corrupted
3377 * stream. Need to discard one frame. Prevents overrun of the
3378 * short_ref and long_ref buffers.
3380 av_log(h->s.avctx, AV_LOG_ERROR,
3381 "number of reference frames exceeds max (probably "
3382 "corrupt input), discarding one\n");
3384 if (h->long_ref_count && !h->short_ref_count) {
3385 for (i = 0; i < 16; ++i)
3390 remove_long(h, i, 0);
3392 pic = h->short_ref[h->short_ref_count - 1];
3393 remove_short(h, pic->frame_num, 0);
3397 print_short_term(h);
3402 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3403 MpegEncContext * const s = &h->s;
3407 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3408 s->broken_link= get_bits1(gb) -1;
3410 h->mmco[0].opcode= MMCO_LONG;
3411 h->mmco[0].long_arg= 0;
3415 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3416 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3417 MMCOOpcode opcode= get_ue_golomb_31(gb);
3419 h->mmco[i].opcode= opcode;
3420 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3421 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3422 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3423 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3427 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3428 unsigned int long_arg= get_ue_golomb_31(gb);
3429 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3430 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3433 h->mmco[i].long_arg= long_arg;
3436 if(opcode > (unsigned)MMCO_LONG){
3437 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3440 if(opcode == MMCO_END)
3445 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3447 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3448 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3449 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3450 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3452 if (FIELD_PICTURE) {
3453 h->mmco[0].short_pic_num *= 2;
3454 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3455 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3465 static int init_poc(H264Context *h){
3466 MpegEncContext * const s = &h->s;
3467 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3469 Picture *cur = s->current_picture_ptr;
3471 h->frame_num_offset= h->prev_frame_num_offset;
3472 if(h->frame_num < h->prev_frame_num)
3473 h->frame_num_offset += max_frame_num;
3475 if(h->sps.poc_type==0){
3476 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3478 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3479 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3480 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3481 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3483 h->poc_msb = h->prev_poc_msb;
3484 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3486 field_poc[1] = h->poc_msb + h->poc_lsb;
3487 if(s->picture_structure == PICT_FRAME)
3488 field_poc[1] += h->delta_poc_bottom;
3489 }else if(h->sps.poc_type==1){
3490 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3493 if(h->sps.poc_cycle_length != 0)
3494 abs_frame_num = h->frame_num_offset + h->frame_num;
3498 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3501 expected_delta_per_poc_cycle = 0;
3502 for(i=0; i < h->sps.poc_cycle_length; i++)
3503 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3505 if(abs_frame_num > 0){
3506 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3507 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3509 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3510 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3511 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3515 if(h->nal_ref_idc == 0)
3516 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3518 field_poc[0] = expectedpoc + h->delta_poc[0];
3519 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3521 if(s->picture_structure == PICT_FRAME)
3522 field_poc[1] += h->delta_poc[1];
3524 int poc= 2*(h->frame_num_offset + h->frame_num);
3533 if(s->picture_structure != PICT_BOTTOM_FIELD)
3534 s->current_picture_ptr->field_poc[0]= field_poc[0];
3535 if(s->picture_structure != PICT_TOP_FIELD)
3536 s->current_picture_ptr->field_poc[1]= field_poc[1];
3537 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3544 * initialize scan tables
3546 static void init_scan_tables(H264Context *h){
3547 MpegEncContext * const s = &h->s;
3549 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3550 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3551 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3553 for(i=0; i<16; i++){
3554 #define T(x) (x>>2) | ((x<<2) & 0xF)
3555 h->zigzag_scan[i] = T(zigzag_scan[i]);
3556 h-> field_scan[i] = T( field_scan[i]);
3560 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3561 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3562 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3563 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3564 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3566 for(i=0; i<64; i++){
3567 #define T(x) (x>>3) | ((x&7)<<3)
3568 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3569 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3570 h->field_scan8x8[i] = T(field_scan8x8[i]);
3571 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3575 if(h->sps.transform_bypass){ //FIXME same ugly
3576 h->zigzag_scan_q0 = zigzag_scan;
3577 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3578 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3579 h->field_scan_q0 = field_scan;
3580 h->field_scan8x8_q0 = field_scan8x8;
3581 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3583 h->zigzag_scan_q0 = h->zigzag_scan;
3584 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3585 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3586 h->field_scan_q0 = h->field_scan;
3587 h->field_scan8x8_q0 = h->field_scan8x8;
3588 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3593 * Replicates H264 "master" context to thread contexts.
3595 static void clone_slice(H264Context *dst, H264Context *src)
3597 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3598 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3599 dst->s.current_picture = src->s.current_picture;
3600 dst->s.linesize = src->s.linesize;
3601 dst->s.uvlinesize = src->s.uvlinesize;
3602 dst->s.first_field = src->s.first_field;
3604 dst->prev_poc_msb = src->prev_poc_msb;
3605 dst->prev_poc_lsb = src->prev_poc_lsb;
3606 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3607 dst->prev_frame_num = src->prev_frame_num;
3608 dst->short_ref_count = src->short_ref_count;
3610 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3611 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3612 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3613 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3615 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3616 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3620 * decodes a slice header.
3621 * This will also call MPV_common_init() and frame_start() as needed.
3623 * @param h h264context
3624 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3626 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3628 static int decode_slice_header(H264Context *h, H264Context *h0){
3629 MpegEncContext * const s = &h->s;
3630 MpegEncContext * const s0 = &h0->s;
3631 unsigned int first_mb_in_slice;
3632 unsigned int pps_id;
3633 int num_ref_idx_active_override_flag;
3634 unsigned int slice_type, tmp, i, j;
3635 int default_ref_list_done = 0;
3636 int last_pic_structure;
3638 s->dropable= h->nal_ref_idc == 0;
3640 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3641 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3642 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3644 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3645 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3648 first_mb_in_slice= get_ue_golomb(&s->gb);
3650 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3651 h0->current_slice = 0;
3652 if (!s0->first_field)
3653 s->current_picture_ptr= NULL;
3656 slice_type= get_ue_golomb_31(&s->gb);
3658 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3663 h->slice_type_fixed=1;
3665 h->slice_type_fixed=0;
3667 slice_type= golomb_to_pict_type[ slice_type ];
3668 if (slice_type == FF_I_TYPE
3669 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3670 default_ref_list_done = 1;
3672 h->slice_type= slice_type;
3673 h->slice_type_nos= slice_type & 3;
3675 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3676 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3677 av_log(h->s.avctx, AV_LOG_ERROR,
3678 "B picture before any references, skipping\n");
3682 pps_id= get_ue_golomb(&s->gb);
3683 if(pps_id>=MAX_PPS_COUNT){
3684 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3687 if(!h0->pps_buffers[pps_id]) {
3688 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3691 h->pps= *h0->pps_buffers[pps_id];
3693 if(!h0->sps_buffers[h->pps.sps_id]) {
3694 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3697 h->sps = *h0->sps_buffers[h->pps.sps_id];
3699 if(h == h0 && h->dequant_coeff_pps != pps_id){
3700 h->dequant_coeff_pps = pps_id;
3701 init_dequant_tables(h);
3704 s->mb_width= h->sps.mb_width;
3705 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3707 h->b_stride= s->mb_width*4;
3708 h->b8_stride= s->mb_width*2;
3710 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3711 if(h->sps.frame_mbs_only_flag)
3712 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3714 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3716 if (s->context_initialized
3717 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3719 return -1; // width / height changed during parallelized decoding
3721 flush_dpb(s->avctx);
3724 if (!s->context_initialized) {
3726 return -1; // we cant (re-)initialize context during parallel decoding
3727 if (MPV_common_init(s) < 0)
3731 init_scan_tables(h);
3734 for(i = 1; i < s->avctx->thread_count; i++) {
3736 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3737 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3738 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3741 init_scan_tables(c);
3745 for(i = 0; i < s->avctx->thread_count; i++)
3746 if(context_init(h->thread_context[i]) < 0)
3749 s->avctx->width = s->width;
3750 s->avctx->height = s->height;
3751 s->avctx->sample_aspect_ratio= h->sps.sar;
3752 if(!s->avctx->sample_aspect_ratio.den)
3753 s->avctx->sample_aspect_ratio.den = 1;
3755 if(h->sps.timing_info_present_flag){
3756 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3757 if(h->x264_build > 0 && h->x264_build < 44)
3758 s->avctx->time_base.den *= 2;
3759 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3760 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3764 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3767 h->mb_aff_frame = 0;
3768 last_pic_structure = s0->picture_structure;
3769 if(h->sps.frame_mbs_only_flag){
3770 s->picture_structure= PICT_FRAME;
3772 if(get_bits1(&s->gb)) { //field_pic_flag
3773 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3775 s->picture_structure= PICT_FRAME;
3776 h->mb_aff_frame = h->sps.mb_aff;
3779 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3781 if(h0->current_slice == 0){
3782 while(h->frame_num != h->prev_frame_num &&
3783 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3784 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3786 h->prev_frame_num++;
3787 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3788 s->current_picture_ptr->frame_num= h->prev_frame_num;
3789 execute_ref_pic_marking(h, NULL, 0);
3792 /* See if we have a decoded first field looking for a pair... */
3793 if (s0->first_field) {
3794 assert(s0->current_picture_ptr);
3795 assert(s0->current_picture_ptr->data[0]);
3796 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3798 /* figure out if we have a complementary field pair */
3799 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3801 * Previous field is unmatched. Don't display it, but let it
3802 * remain for reference if marked as such.
3804 s0->current_picture_ptr = NULL;
3805 s0->first_field = FIELD_PICTURE;
3808 if (h->nal_ref_idc &&
3809 s0->current_picture_ptr->reference &&
3810 s0->current_picture_ptr->frame_num != h->frame_num) {
3812 * This and previous field were reference, but had
3813 * different frame_nums. Consider this field first in
3814 * pair. Throw away previous field except for reference
3817 s0->first_field = 1;
3818 s0->current_picture_ptr = NULL;
3821 /* Second field in complementary pair */
3822 s0->first_field = 0;
3827 /* Frame or first field in a potentially complementary pair */
3828 assert(!s0->current_picture_ptr);
3829 s0->first_field = FIELD_PICTURE;
3832 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3833 s0->first_field = 0;
3840 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3842 assert(s->mb_num == s->mb_width * s->mb_height);
3843 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3844 first_mb_in_slice >= s->mb_num){
3845 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3848 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3849 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3850 if (s->picture_structure == PICT_BOTTOM_FIELD)
3851 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3852 assert(s->mb_y < s->mb_height);
3854 if(s->picture_structure==PICT_FRAME){
3855 h->curr_pic_num= h->frame_num;
3856 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3858 h->curr_pic_num= 2*h->frame_num + 1;
3859 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3862 if(h->nal_unit_type == NAL_IDR_SLICE){
3863 get_ue_golomb(&s->gb); /* idr_pic_id */
3866 if(h->sps.poc_type==0){
3867 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3869 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3870 h->delta_poc_bottom= get_se_golomb(&s->gb);
3874 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3875 h->delta_poc[0]= get_se_golomb(&s->gb);
3877 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3878 h->delta_poc[1]= get_se_golomb(&s->gb);
3883 if(h->pps.redundant_pic_cnt_present){
3884 h->redundant_pic_count= get_ue_golomb(&s->gb);
3887 //set defaults, might be overridden a few lines later
3888 h->ref_count[0]= h->pps.ref_count[0];
3889 h->ref_count[1]= h->pps.ref_count[1];
3891 if(h->slice_type_nos != FF_I_TYPE){
3892 if(h->slice_type_nos == FF_B_TYPE){
3893 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3895 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3897 if(num_ref_idx_active_override_flag){
3898 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3899 if(h->slice_type_nos==FF_B_TYPE)
3900 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3902 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3903 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3904 h->ref_count[0]= h->ref_count[1]= 1;
3908 if(h->slice_type_nos == FF_B_TYPE)
3915 if(!default_ref_list_done){
3916 fill_default_ref_list(h);
3919 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3922 if(h->slice_type_nos!=FF_I_TYPE){
3923 s->last_picture_ptr= &h->ref_list[0][0];
3924 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3926 if(h->slice_type_nos==FF_B_TYPE){
3927 s->next_picture_ptr= &h->ref_list[1][0];
3928 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3931 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3932 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3933 pred_weight_table(h);
3934 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3935 implicit_weight_table(h);
3940 decode_ref_pic_marking(h0, &s->gb);
3943 fill_mbaff_ref_list(h);
3945 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3946 direct_dist_scale_factor(h);
3947 direct_ref_list_init(h);
3949 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3950 tmp = get_ue_golomb_31(&s->gb);
3952 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3955 h->cabac_init_idc= tmp;
3958 h->last_qscale_diff = 0;
3959 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3961 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3965 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3966 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3967 //FIXME qscale / qp ... stuff
3968 if(h->slice_type == FF_SP_TYPE){
3969 get_bits1(&s->gb); /* sp_for_switch_flag */
3971 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3972 get_se_golomb(&s->gb); /* slice_qs_delta */
3975 h->deblocking_filter = 1;
3976 h->slice_alpha_c0_offset = 0;
3977 h->slice_beta_offset = 0;
3978 if( h->pps.deblocking_filter_parameters_present ) {
3979 tmp= get_ue_golomb_31(&s->gb);
3981 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3984 h->deblocking_filter= tmp;
3985 if(h->deblocking_filter < 2)
3986 h->deblocking_filter^= 1; // 1<->0
3988 if( h->deblocking_filter ) {
3989 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3990 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3994 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3995 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3996 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3997 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3998 h->deblocking_filter= 0;
4000 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4001 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4002 /* Cheat slightly for speed:
4003 Do not bother to deblock across slices. */
4004 h->deblocking_filter = 2;
4006 h0->max_contexts = 1;
4007 if(!h0->single_decode_warning) {
4008 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4009 h0->single_decode_warning = 1;
4012 return 1; // deblocking switched inside frame
4017 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4018 slice_group_change_cycle= get_bits(&s->gb, ?);
4021 h0->last_slice_type = slice_type;
4022 h->slice_num = ++h0->current_slice;
4023 if(h->slice_num >= MAX_SLICES){
4024 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4028 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4032 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4033 +(h->ref_list[j][i].reference&3);
4036 for(i=16; i<48; i++)
4037 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4038 +(h->ref_list[j][i].reference&3);
4041 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4042 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4044 s->avctx->refs= h->sps.ref_frame_count;
4046 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4047 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4049 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4051 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4052 pps_id, h->frame_num,
4053 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4054 h->ref_count[0], h->ref_count[1],
4056 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4058 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4059 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4069 static inline int get_level_prefix(GetBitContext *gb){
4073 OPEN_READER(re, gb);
4074 UPDATE_CACHE(re, gb);
4075 buf=GET_CACHE(re, gb);
4077 log= 32 - av_log2(buf);
4079 print_bin(buf>>(32-log), log);
4080 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4083 LAST_SKIP_BITS(re, gb, log);
4084 CLOSE_READER(re, gb);
4089 static inline int get_dct8x8_allowed(H264Context *h){
4090 if(h->sps.direct_8x8_inference_flag)
4091 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4093 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4097 * decodes a residual block.
4098 * @param n block index
4099 * @param scantable scantable
4100 * @param max_coeff number of coefficients in the block
4101 * @return <0 if an error occurred
4103 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4104 MpegEncContext * const s = &h->s;
4105 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4107 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4109 //FIXME put trailing_onex into the context
4111 if(n == CHROMA_DC_BLOCK_INDEX){
4112 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4113 total_coeff= coeff_token>>2;
4115 if(n == LUMA_DC_BLOCK_INDEX){
4116 total_coeff= pred_non_zero_count(h, 0);
4117 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4118 total_coeff= coeff_token>>2;
4120 total_coeff= pred_non_zero_count(h, n);
4121 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4122 total_coeff= coeff_token>>2;
4123 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4127 //FIXME set last_non_zero?
4131 if(total_coeff > (unsigned)max_coeff) {
4132 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4136 trailing_ones= coeff_token&3;
4137 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4138 assert(total_coeff<=16);
4140 i = show_bits(gb, 3);
4141 skip_bits(gb, trailing_ones);
4142 level[0] = 1-((i&4)>>1);
4143 level[1] = 1-((i&2) );
4144 level[2] = 1-((i&1)<<1);
4146 if(trailing_ones<total_coeff) {
4148 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4149 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4150 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4152 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4153 if(level_code >= 100){
4154 prefix= level_code - 100;
4155 if(prefix == LEVEL_TAB_BITS)
4156 prefix += get_level_prefix(gb);
4158 //first coefficient has suffix_length equal to 0 or 1
4159 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4161 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4163 level_code= (prefix<<suffix_length); //part
4164 }else if(prefix==14){
4166 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4168 level_code= prefix + get_bits(gb, 4); //part
4170 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4171 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4173 level_code += (1<<(prefix-3))-4096;
4176 if(trailing_ones < 3) level_code += 2;
4179 mask= -(level_code&1);
4180 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4182 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4185 if(level_code + 3U > 6U)
4187 level[trailing_ones]= level_code;
4190 //remaining coefficients have suffix_length > 0
4191 for(i=trailing_ones+1;i<total_coeff;i++) {
4192 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4193 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4194 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4196 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4197 if(level_code >= 100){
4198 prefix= level_code - 100;
4199 if(prefix == LEVEL_TAB_BITS){
4200 prefix += get_level_prefix(gb);
4203 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4205 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4207 level_code += (1<<(prefix-3))-4096;
4209 mask= -(level_code&1);
4210 level_code= (((2+level_code)>>1) ^ mask) - mask;
4212 level[i]= level_code;
4214 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4219 if(total_coeff == max_coeff)
4222 if(n == CHROMA_DC_BLOCK_INDEX)
4223 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4225 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4228 coeff_num = zeros_left + total_coeff - 1;
4229 j = scantable[coeff_num];
4231 block[j] = level[0];
4232 for(i=1;i<total_coeff;i++) {
4235 else if(zeros_left < 7){
4236 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4238 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4240 zeros_left -= run_before;
4241 coeff_num -= 1 + run_before;
4242 j= scantable[ coeff_num ];
4247 block[j] = (level[0] * qmul[j] + 32)>>6;
4248 for(i=1;i<total_coeff;i++) {
4251 else if(zeros_left < 7){
4252 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4254 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4256 zeros_left -= run_before;
4257 coeff_num -= 1 + run_before;
4258 j= scantable[ coeff_num ];
4260 block[j]= (level[i] * qmul[j] + 32)>>6;
4265 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4272 static void predict_field_decoding_flag(H264Context *h){
4273 MpegEncContext * const s = &h->s;
4274 const int mb_xy= h->mb_xy;
4275 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4276 ? s->current_picture.mb_type[mb_xy-1]
4277 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4278 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4280 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4284 * decodes a P_SKIP or B_SKIP macroblock
4286 static void decode_mb_skip(H264Context *h){
4287 MpegEncContext * const s = &h->s;
4288 const int mb_xy= h->mb_xy;
4291 memset(h->non_zero_count[mb_xy], 0, 16);
4292 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4295 mb_type|= MB_TYPE_INTERLACED;
4297 if( h->slice_type_nos == FF_B_TYPE )
4299 // just for fill_caches. pred_direct_motion will set the real mb_type
4300 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4302 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4303 pred_direct_motion(h, &mb_type);
4304 mb_type|= MB_TYPE_SKIP;
4309 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4311 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4312 pred_pskip_motion(h, &mx, &my);
4313 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4314 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4317 write_back_motion(h, mb_type);
4318 s->current_picture.mb_type[mb_xy]= mb_type;
4319 s->current_picture.qscale_table[mb_xy]= s->qscale;
4320 h->slice_table[ mb_xy ]= h->slice_num;
4321 h->prev_mb_skipped= 1;
4325 * decodes a macroblock
4326 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4328 static int decode_mb_cavlc(H264Context *h){
4329 MpegEncContext * const s = &h->s;
4331 int partition_count;
4332 unsigned int mb_type, cbp;
4333 int dct8x8_allowed= h->pps.transform_8x8_mode;
4335 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4337 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4338 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4340 if(h->slice_type_nos != FF_I_TYPE){
4341 if(s->mb_skip_run==-1)
4342 s->mb_skip_run= get_ue_golomb(&s->gb);
4344 if (s->mb_skip_run--) {
4345 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4346 if(s->mb_skip_run==0)
4347 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4349 predict_field_decoding_flag(h);
4356 if( (s->mb_y&1) == 0 )
4357 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4360 h->prev_mb_skipped= 0;
4362 mb_type= get_ue_golomb(&s->gb);
4363 if(h->slice_type_nos == FF_B_TYPE){
4365 partition_count= b_mb_type_info[mb_type].partition_count;
4366 mb_type= b_mb_type_info[mb_type].type;
4369 goto decode_intra_mb;
4371 }else if(h->slice_type_nos == FF_P_TYPE){
4373 partition_count= p_mb_type_info[mb_type].partition_count;
4374 mb_type= p_mb_type_info[mb_type].type;
4377 goto decode_intra_mb;
4380 assert(h->slice_type_nos == FF_I_TYPE);
4381 if(h->slice_type == FF_SI_TYPE && mb_type)
4385 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4389 cbp= i_mb_type_info[mb_type].cbp;
4390 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4391 mb_type= i_mb_type_info[mb_type].type;
4395 mb_type |= MB_TYPE_INTERLACED;
4397 h->slice_table[ mb_xy ]= h->slice_num;
4399 if(IS_INTRA_PCM(mb_type)){
4402 // We assume these blocks are very rare so we do not optimize it.
4403 align_get_bits(&s->gb);
4405 // The pixels are stored in the same order as levels in h->mb array.
4406 for(x=0; x < (CHROMA ? 384 : 256); x++){
4407 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4410 // In deblocking, the quantizer is 0
4411 s->current_picture.qscale_table[mb_xy]= 0;
4412 // All coeffs are present
4413 memset(h->non_zero_count[mb_xy], 16, 16);
4415 s->current_picture.mb_type[mb_xy]= mb_type;
4420 h->ref_count[0] <<= 1;
4421 h->ref_count[1] <<= 1;
4424 fill_caches(h, mb_type, 0);
4427 if(IS_INTRA(mb_type)){
4429 // init_top_left_availability(h);
4430 if(IS_INTRA4x4(mb_type)){
4433 if(dct8x8_allowed && get_bits1(&s->gb)){
4434 mb_type |= MB_TYPE_8x8DCT;
4438 // fill_intra4x4_pred_table(h);
4439 for(i=0; i<16; i+=di){
4440 int mode= pred_intra_mode(h, i);
4442 if(!get_bits1(&s->gb)){
4443 const int rem_mode= get_bits(&s->gb, 3);
4444 mode = rem_mode + (rem_mode >= mode);
4448 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4450 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4452 write_back_intra_pred_mode(h);
4453 if( check_intra4x4_pred_mode(h) < 0)
4456 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4457 if(h->intra16x16_pred_mode < 0)
4461 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4464 h->chroma_pred_mode= pred_mode;
4466 }else if(partition_count==4){
4467 int i, j, sub_partition_count[4], list, ref[2][4];
4469 if(h->slice_type_nos == FF_B_TYPE){
4471 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4472 if(h->sub_mb_type[i] >=13){
4473 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4476 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4477 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4479 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4480 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4481 pred_direct_motion(h, &mb_type);
4482 h->ref_cache[0][scan8[4]] =
4483 h->ref_cache[1][scan8[4]] =
4484 h->ref_cache[0][scan8[12]] =
4485 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4488 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4490 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4491 if(h->sub_mb_type[i] >=4){
4492 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4495 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4496 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4500 for(list=0; list<h->list_count; list++){
4501 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4503 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4504 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4508 }else if(ref_count == 2){
4509 tmp= get_bits1(&s->gb)^1;
4511 tmp= get_ue_golomb_31(&s->gb);
4513 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4526 dct8x8_allowed = get_dct8x8_allowed(h);
4528 for(list=0; list<h->list_count; list++){
4530 if(IS_DIRECT(h->sub_mb_type[i])) {
4531 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4534 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4535 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4537 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4538 const int sub_mb_type= h->sub_mb_type[i];
4539 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4540 for(j=0; j<sub_partition_count[i]; j++){
4542 const int index= 4*i + block_width*j;
4543 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4544 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4545 mx += get_se_golomb(&s->gb);
4546 my += get_se_golomb(&s->gb);
4547 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4549 if(IS_SUB_8X8(sub_mb_type)){
4551 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4553 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4554 }else if(IS_SUB_8X4(sub_mb_type)){
4555 mv_cache[ 1 ][0]= mx;
4556 mv_cache[ 1 ][1]= my;
4557 }else if(IS_SUB_4X8(sub_mb_type)){
4558 mv_cache[ 8 ][0]= mx;
4559 mv_cache[ 8 ][1]= my;
4561 mv_cache[ 0 ][0]= mx;
4562 mv_cache[ 0 ][1]= my;
4565 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4571 }else if(IS_DIRECT(mb_type)){
4572 pred_direct_motion(h, &mb_type);
4573 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4575 int list, mx, my, i;
4576 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4577 if(IS_16X16(mb_type)){
4578 for(list=0; list<h->list_count; list++){
4580 if(IS_DIR(mb_type, 0, list)){
4581 if(h->ref_count[list]==1){
4583 }else if(h->ref_count[list]==2){
4584 val= get_bits1(&s->gb)^1;
4586 val= get_ue_golomb_31(&s->gb);
4587 if(val >= h->ref_count[list]){
4588 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4593 val= LIST_NOT_USED&0xFF;
4594 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4596 for(list=0; list<h->list_count; list++){
4598 if(IS_DIR(mb_type, 0, list)){
4599 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4600 mx += get_se_golomb(&s->gb);
4601 my += get_se_golomb(&s->gb);
4602 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4604 val= pack16to32(mx,my);
4607 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4610 else if(IS_16X8(mb_type)){
4611 for(list=0; list<h->list_count; list++){
4614 if(IS_DIR(mb_type, i, list)){
4615 if(h->ref_count[list] == 1){
4617 }else if(h->ref_count[list] == 2){
4618 val= get_bits1(&s->gb)^1;
4620 val= get_ue_golomb_31(&s->gb);
4621 if(val >= h->ref_count[list]){
4622 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4627 val= LIST_NOT_USED&0xFF;
4628 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4631 for(list=0; list<h->list_count; list++){
4634 if(IS_DIR(mb_type, i, list)){
4635 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4636 mx += get_se_golomb(&s->gb);
4637 my += get_se_golomb(&s->gb);
4638 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4640 val= pack16to32(mx,my);
4643 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4647 assert(IS_8X16(mb_type));
4648 for(list=0; list<h->list_count; list++){
4651 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4652 if(h->ref_count[list]==1){
4654 }else if(h->ref_count[list]==2){
4655 val= get_bits1(&s->gb)^1;
4657 val= get_ue_golomb_31(&s->gb);
4658 if(val >= h->ref_count[list]){
4659 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4664 val= LIST_NOT_USED&0xFF;
4665 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4668 for(list=0; list<h->list_count; list++){
4671 if(IS_DIR(mb_type, i, list)){
4672 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4673 mx += get_se_golomb(&s->gb);
4674 my += get_se_golomb(&s->gb);
4675 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4677 val= pack16to32(mx,my);
4680 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4686 if(IS_INTER(mb_type))
4687 write_back_motion(h, mb_type);
4689 if(!IS_INTRA16x16(mb_type)){
4690 cbp= get_ue_golomb(&s->gb);
4692 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4697 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4698 else cbp= golomb_to_inter_cbp [cbp];
4700 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4701 else cbp= golomb_to_inter_cbp_gray[cbp];
4706 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4707 if(get_bits1(&s->gb)){
4708 mb_type |= MB_TYPE_8x8DCT;
4709 h->cbp_table[mb_xy]= cbp;
4712 s->current_picture.mb_type[mb_xy]= mb_type;
4714 if(cbp || IS_INTRA16x16(mb_type)){
4715 int i8x8, i4x4, chroma_idx;
4717 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4718 const uint8_t *scan, *scan8x8, *dc_scan;
4720 // fill_non_zero_count_cache(h);
4722 if(IS_INTERLACED(mb_type)){
4723 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4724 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4725 dc_scan= luma_dc_field_scan;
4727 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4728 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4729 dc_scan= luma_dc_zigzag_scan;
4732 dquant= get_se_golomb(&s->gb);
4734 if( dquant > 25 || dquant < -26 ){
4735 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4739 s->qscale += dquant;
4740 if(((unsigned)s->qscale) > 51){
4741 if(s->qscale<0) s->qscale+= 52;
4742 else s->qscale-= 52;
4745 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4746 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4747 if(IS_INTRA16x16(mb_type)){
4748 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4749 return -1; //FIXME continue if partitioned and other return -1 too
4752 assert((cbp&15) == 0 || (cbp&15) == 15);
4755 for(i8x8=0; i8x8<4; i8x8++){
4756 for(i4x4=0; i4x4<4; i4x4++){
4757 const int index= i4x4 + 4*i8x8;
4758 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4764 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4767 for(i8x8=0; i8x8<4; i8x8++){
4768 if(cbp & (1<<i8x8)){
4769 if(IS_8x8DCT(mb_type)){
4770 DCTELEM *buf = &h->mb[64*i8x8];
4772 for(i4x4=0; i4x4<4; i4x4++){
4773 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4774 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4777 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4778 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4780 for(i4x4=0; i4x4<4; i4x4++){
4781 const int index= i4x4 + 4*i8x8;
4783 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4789 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4790 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4796 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4797 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4803 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4804 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4805 for(i4x4=0; i4x4<4; i4x4++){
4806 const int index= 16 + 4*chroma_idx + i4x4;
4807 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4813 uint8_t * const nnz= &h->non_zero_count_cache[0];
4814 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4815 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4818 uint8_t * const nnz= &h->non_zero_count_cache[0];
4819 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4820 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4821 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4823 s->current_picture.qscale_table[mb_xy]= s->qscale;
4824 write_back_non_zero_count(h);
4827 h->ref_count[0] >>= 1;
4828 h->ref_count[1] >>= 1;
4834 static int decode_cabac_field_decoding_flag(H264Context *h) {
4835 MpegEncContext * const s = &h->s;
4836 const int mb_x = s->mb_x;
4837 const int mb_y = s->mb_y & ~1;
4838 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4839 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4841 unsigned int ctx = 0;
4843 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4846 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4850 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4853 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4854 uint8_t *state= &h->cabac_state[ctx_base];
4858 MpegEncContext * const s = &h->s;
4859 const int mba_xy = h->left_mb_xy[0];
4860 const int mbb_xy = h->top_mb_xy;
4862 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4864 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4866 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4867 return 0; /* I4x4 */
4870 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4871 return 0; /* I4x4 */
4874 if( get_cabac_terminate( &h->cabac ) )
4875 return 25; /* PCM */
4877 mb_type = 1; /* I16x16 */
4878 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4879 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4880 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4881 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4882 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4886 static int decode_cabac_mb_type_b( H264Context *h ) {
4887 MpegEncContext * const s = &h->s;
4889 const int mba_xy = h->left_mb_xy[0];
4890 const int mbb_xy = h->top_mb_xy;
4893 assert(h->slice_type_nos == FF_B_TYPE);
4895 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4897 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4900 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4901 return 0; /* B_Direct_16x16 */
4903 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4904 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4907 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4908 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4909 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4910 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4912 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4913 else if( bits == 13 ) {
4914 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4915 } else if( bits == 14 )
4916 return 11; /* B_L1_L0_8x16 */
4917 else if( bits == 15 )
4918 return 22; /* B_8x8 */
4920 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4921 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4924 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4925 MpegEncContext * const s = &h->s;
4929 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4930 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4933 && h->slice_table[mba_xy] == h->slice_num
4934 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4935 mba_xy += s->mb_stride;
4937 mbb_xy = mb_xy - s->mb_stride;
4939 && h->slice_table[mbb_xy] == h->slice_num
4940 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4941 mbb_xy -= s->mb_stride;
4943 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4945 int mb_xy = h->mb_xy;
4947 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4950 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4952 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4955 if( h->slice_type_nos == FF_B_TYPE )
4957 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4960 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4963 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4966 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4967 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4968 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4970 if( mode >= pred_mode )
4976 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4977 const int mba_xy = h->left_mb_xy[0];
4978 const int mbb_xy = h->top_mb_xy;
4982 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4983 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4986 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4989 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4992 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4994 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5000 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5001 int cbp_b, cbp_a, ctx, cbp = 0;
5003 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5004 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5006 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5007 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5008 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5009 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5010 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5011 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5012 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5013 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5016 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5020 cbp_a = (h->left_cbp>>4)&0x03;
5021 cbp_b = (h-> top_cbp>>4)&0x03;
5024 if( cbp_a > 0 ) ctx++;
5025 if( cbp_b > 0 ) ctx += 2;
5026 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5030 if( cbp_a == 2 ) ctx++;
5031 if( cbp_b == 2 ) ctx += 2;
5032 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5034 static int decode_cabac_mb_dqp( H264Context *h) {
5035 int ctx= h->last_qscale_diff != 0;
5038 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5041 if(val > 102) //prevent infinite loop
5046 return (val + 1)>>1 ;
5048 return -((val + 1)>>1);
5050 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5051 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5053 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5055 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5059 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5061 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5062 return 0; /* B_Direct_8x8 */
5063 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5064 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5066 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5067 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5068 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5071 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5072 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5076 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5077 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5080 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5081 int refa = h->ref_cache[list][scan8[n] - 1];
5082 int refb = h->ref_cache[list][scan8[n] - 8];
5086 if( h->slice_type_nos == FF_B_TYPE) {
5087 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5089 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5098 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5101 if(ref >= 32 /*h->ref_list[list]*/){
5108 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5109 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5110 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5111 int ctxbase = (l == 0) ? 40 : 47;
5113 int ctx = (amvd>2) + (amvd>32);
5115 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5120 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5128 while( get_cabac_bypass( &h->cabac ) ) {
5132 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5137 if( get_cabac_bypass( &h->cabac ) )
5141 return get_cabac_bypass_sign( &h->cabac, -mvd );
5144 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5150 nza = h->left_cbp&0x100;
5151 nzb = h-> top_cbp&0x100;
5153 nza = (h->left_cbp>>(6+idx))&0x01;
5154 nzb = (h-> top_cbp>>(6+idx))&0x01;
5157 assert(cat == 1 || cat == 2 || cat == 4);
5158 nza = h->non_zero_count_cache[scan8[idx] - 1];
5159 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5168 return ctx + 4 * cat;
5171 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5172 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5173 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5174 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5175 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5178 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5179 static const int significant_coeff_flag_offset[2][6] = {
5180 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5181 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5183 static const int last_coeff_flag_offset[2][6] = {
5184 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5185 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5187 static const int coeff_abs_level_m1_offset[6] = {
5188 227+0, 227+10, 227+20, 227+30, 227+39, 426
5190 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5191 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5192 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5193 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5194 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5195 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5196 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5197 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5198 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5200 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5201 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5202 * map node ctx => cabac ctx for level=1 */
5203 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5204 /* map node ctx => cabac ctx for level>1 */
5205 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5206 static const uint8_t coeff_abs_level_transition[2][8] = {
5207 /* update node ctx after decoding a level=1 */
5208 { 1, 2, 3, 3, 4, 5, 6, 7 },
5209 /* update node ctx after decoding a level>1 */
5210 { 4, 4, 4, 4, 5, 6, 7, 7 }
5216 int coeff_count = 0;
5219 uint8_t *significant_coeff_ctx_base;
5220 uint8_t *last_coeff_ctx_base;
5221 uint8_t *abs_level_m1_ctx_base;
5224 #define CABAC_ON_STACK
5226 #ifdef CABAC_ON_STACK
5229 cc.range = h->cabac.range;
5230 cc.low = h->cabac.low;
5231 cc.bytestream= h->cabac.bytestream;
5233 #define CC &h->cabac
5237 /* cat: 0-> DC 16x16 n = 0
5238 * 1-> AC 16x16 n = luma4x4idx
5239 * 2-> Luma4x4 n = luma4x4idx
5240 * 3-> DC Chroma n = iCbCr
5241 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5242 * 5-> Luma8x8 n = 4 * luma8x8idx
5245 /* read coded block flag */
5246 if( is_dc || cat != 5 ) {
5247 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5249 h->non_zero_count_cache[scan8[n]] = 0;
5251 #ifdef CABAC_ON_STACK
5252 h->cabac.range = cc.range ;
5253 h->cabac.low = cc.low ;
5254 h->cabac.bytestream= cc.bytestream;
5260 significant_coeff_ctx_base = h->cabac_state
5261 + significant_coeff_flag_offset[MB_FIELD][cat];
5262 last_coeff_ctx_base = h->cabac_state
5263 + last_coeff_flag_offset[MB_FIELD][cat];
5264 abs_level_m1_ctx_base = h->cabac_state
5265 + coeff_abs_level_m1_offset[cat];
5267 if( !is_dc && cat == 5 ) {
5268 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5269 for(last= 0; last < coefs; last++) { \
5270 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5271 if( get_cabac( CC, sig_ctx )) { \
5272 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5273 index[coeff_count++] = last; \
5274 if( get_cabac( CC, last_ctx ) ) { \
5280 if( last == max_coeff -1 ) {\
5281 index[coeff_count++] = last;\
5283 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5284 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5285 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5287 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5289 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5291 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5294 assert(coeff_count > 0);
5298 h->cbp_table[h->mb_xy] |= 0x100;
5300 h->cbp_table[h->mb_xy] |= 0x40 << n;
5303 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5305 assert( cat == 1 || cat == 2 || cat == 4 );
5306 h->non_zero_count_cache[scan8[n]] = coeff_count;
5311 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5313 int j= scantable[index[--coeff_count]];
5315 if( get_cabac( CC, ctx ) == 0 ) {
5316 node_ctx = coeff_abs_level_transition[0][node_ctx];
5318 block[j] = get_cabac_bypass_sign( CC, -1);
5320 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5324 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5325 node_ctx = coeff_abs_level_transition[1][node_ctx];
5327 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5331 if( coeff_abs >= 15 ) {
5333 while( get_cabac_bypass( CC ) ) {
5339 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5345 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5347 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5350 } while( coeff_count );
5351 #ifdef CABAC_ON_STACK
5352 h->cabac.range = cc.range ;
5353 h->cabac.low = cc.low ;
5354 h->cabac.bytestream= cc.bytestream;
5359 #ifndef CONFIG_SMALL
5360 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5361 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5364 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5365 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5369 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5371 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5373 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5374 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5378 static inline void compute_mb_neighbors(H264Context *h)
5380 MpegEncContext * const s = &h->s;
5381 const int mb_xy = h->mb_xy;
5382 h->top_mb_xy = mb_xy - s->mb_stride;
5383 h->left_mb_xy[0] = mb_xy - 1;
5385 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5386 const int top_pair_xy = pair_xy - s->mb_stride;
5387 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5388 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5389 const int curr_mb_field_flag = MB_FIELD;
5390 const int bottom = (s->mb_y & 1);
5392 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5393 h->top_mb_xy -= s->mb_stride;
5395 if (!left_mb_field_flag == curr_mb_field_flag) {
5396 h->left_mb_xy[0] = pair_xy - 1;
5398 } else if (FIELD_PICTURE) {
5399 h->top_mb_xy -= s->mb_stride;
5405 * decodes a macroblock
5406 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5408 static int decode_mb_cabac(H264Context *h) {
5409 MpegEncContext * const s = &h->s;
5411 int mb_type, partition_count, cbp = 0;
5412 int dct8x8_allowed= h->pps.transform_8x8_mode;
5414 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5416 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5417 if( h->slice_type_nos != FF_I_TYPE ) {
5419 /* a skipped mb needs the aff flag from the following mb */
5420 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5421 predict_field_decoding_flag(h);
5422 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5423 skip = h->next_mb_skipped;
5425 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5426 /* read skip flags */
5428 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5429 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5430 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5431 if(!h->next_mb_skipped)
5432 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5437 h->cbp_table[mb_xy] = 0;
5438 h->chroma_pred_mode_table[mb_xy] = 0;
5439 h->last_qscale_diff = 0;
5446 if( (s->mb_y&1) == 0 )
5448 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5451 h->prev_mb_skipped = 0;
5453 compute_mb_neighbors(h);
5455 if( h->slice_type_nos == FF_B_TYPE ) {
5456 mb_type = decode_cabac_mb_type_b( h );
5458 partition_count= b_mb_type_info[mb_type].partition_count;
5459 mb_type= b_mb_type_info[mb_type].type;
5462 goto decode_intra_mb;
5464 } else if( h->slice_type_nos == FF_P_TYPE ) {
5465 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5467 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5468 /* P_L0_D16x16, P_8x8 */
5469 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5471 /* P_L0_D8x16, P_L0_D16x8 */
5472 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5474 partition_count= p_mb_type_info[mb_type].partition_count;
5475 mb_type= p_mb_type_info[mb_type].type;
5477 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5478 goto decode_intra_mb;
5481 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5482 if(h->slice_type == FF_SI_TYPE && mb_type)
5484 assert(h->slice_type_nos == FF_I_TYPE);
5486 partition_count = 0;
5487 cbp= i_mb_type_info[mb_type].cbp;
5488 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5489 mb_type= i_mb_type_info[mb_type].type;
5492 mb_type |= MB_TYPE_INTERLACED;
5494 h->slice_table[ mb_xy ]= h->slice_num;
5496 if(IS_INTRA_PCM(mb_type)) {
5499 // We assume these blocks are very rare so we do not optimize it.
5500 // FIXME The two following lines get the bitstream position in the cabac
5501 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5502 ptr= h->cabac.bytestream;
5503 if(h->cabac.low&0x1) ptr--;
5505 if(h->cabac.low&0x1FF) ptr--;
5508 // The pixels are stored in the same order as levels in h->mb array.
5509 memcpy(h->mb, ptr, 256); ptr+=256;
5511 memcpy(h->mb+128, ptr, 128); ptr+=128;
5514 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5516 // All blocks are present
5517 h->cbp_table[mb_xy] = 0x1ef;
5518 h->chroma_pred_mode_table[mb_xy] = 0;
5519 // In deblocking, the quantizer is 0
5520 s->current_picture.qscale_table[mb_xy]= 0;
5521 // All coeffs are present
5522 memset(h->non_zero_count[mb_xy], 16, 16);
5523 s->current_picture.mb_type[mb_xy]= mb_type;
5524 h->last_qscale_diff = 0;
5529 h->ref_count[0] <<= 1;
5530 h->ref_count[1] <<= 1;
5533 fill_caches(h, mb_type, 0);
5535 if( IS_INTRA( mb_type ) ) {
5537 if( IS_INTRA4x4( mb_type ) ) {
5538 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5539 mb_type |= MB_TYPE_8x8DCT;
5540 for( i = 0; i < 16; i+=4 ) {
5541 int pred = pred_intra_mode( h, i );
5542 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5543 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5546 for( i = 0; i < 16; i++ ) {
5547 int pred = pred_intra_mode( h, i );
5548 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5550 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5553 write_back_intra_pred_mode(h);
5554 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5556 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5557 if( h->intra16x16_pred_mode < 0 ) return -1;
5560 h->chroma_pred_mode_table[mb_xy] =
5561 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5563 pred_mode= check_intra_pred_mode( h, pred_mode );
5564 if( pred_mode < 0 ) return -1;
5565 h->chroma_pred_mode= pred_mode;
5567 } else if( partition_count == 4 ) {
5568 int i, j, sub_partition_count[4], list, ref[2][4];
5570 if( h->slice_type_nos == FF_B_TYPE ) {
5571 for( i = 0; i < 4; i++ ) {
5572 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5573 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5574 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5576 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5577 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5578 pred_direct_motion(h, &mb_type);
5579 h->ref_cache[0][scan8[4]] =
5580 h->ref_cache[1][scan8[4]] =
5581 h->ref_cache[0][scan8[12]] =
5582 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5583 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5584 for( i = 0; i < 4; i++ )
5585 if( IS_DIRECT(h->sub_mb_type[i]) )
5586 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5590 for( i = 0; i < 4; i++ ) {
5591 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5592 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5593 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5597 for( list = 0; list < h->list_count; list++ ) {
5598 for( i = 0; i < 4; i++ ) {
5599 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5600 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5601 if( h->ref_count[list] > 1 ){
5602 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5603 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5604 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5612 h->ref_cache[list][ scan8[4*i]+1 ]=
5613 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5618 dct8x8_allowed = get_dct8x8_allowed(h);
5620 for(list=0; list<h->list_count; list++){
5622 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5623 if(IS_DIRECT(h->sub_mb_type[i])){
5624 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5628 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5629 const int sub_mb_type= h->sub_mb_type[i];
5630 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5631 for(j=0; j<sub_partition_count[i]; j++){
5634 const int index= 4*i + block_width*j;
5635 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5636 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5637 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5639 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5640 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5641 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5643 if(IS_SUB_8X8(sub_mb_type)){
5645 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5647 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5650 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5652 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5653 }else if(IS_SUB_8X4(sub_mb_type)){
5654 mv_cache[ 1 ][0]= mx;
5655 mv_cache[ 1 ][1]= my;
5657 mvd_cache[ 1 ][0]= mx - mpx;
5658 mvd_cache[ 1 ][1]= my - mpy;
5659 }else if(IS_SUB_4X8(sub_mb_type)){
5660 mv_cache[ 8 ][0]= mx;
5661 mv_cache[ 8 ][1]= my;
5663 mvd_cache[ 8 ][0]= mx - mpx;
5664 mvd_cache[ 8 ][1]= my - mpy;
5666 mv_cache[ 0 ][0]= mx;
5667 mv_cache[ 0 ][1]= my;
5669 mvd_cache[ 0 ][0]= mx - mpx;
5670 mvd_cache[ 0 ][1]= my - mpy;
5673 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5674 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5675 p[0] = p[1] = p[8] = p[9] = 0;
5676 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5680 } else if( IS_DIRECT(mb_type) ) {
5681 pred_direct_motion(h, &mb_type);
5682 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5683 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5684 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5686 int list, mx, my, i, mpx, mpy;
5687 if(IS_16X16(mb_type)){
5688 for(list=0; list<h->list_count; list++){
5689 if(IS_DIR(mb_type, 0, list)){
5691 if(h->ref_count[list] > 1){
5692 ref= decode_cabac_mb_ref(h, list, 0);
5693 if(ref >= (unsigned)h->ref_count[list]){
5694 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5699 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5701 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5703 for(list=0; list<h->list_count; list++){
5704 if(IS_DIR(mb_type, 0, list)){
5705 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5707 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5708 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5709 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5711 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5712 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5714 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5717 else if(IS_16X8(mb_type)){
5718 for(list=0; list<h->list_count; list++){
5720 if(IS_DIR(mb_type, i, list)){
5722 if(h->ref_count[list] > 1){
5723 ref= decode_cabac_mb_ref( h, list, 8*i );
5724 if(ref >= (unsigned)h->ref_count[list]){
5725 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5730 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5732 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5735 for(list=0; list<h->list_count; list++){
5737 if(IS_DIR(mb_type, i, list)){
5738 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5739 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5740 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5741 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5743 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5744 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5746 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5747 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5752 assert(IS_8X16(mb_type));
5753 for(list=0; list<h->list_count; list++){
5755 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5757 if(h->ref_count[list] > 1){
5758 ref= decode_cabac_mb_ref( h, list, 4*i );
5759 if(ref >= (unsigned)h->ref_count[list]){
5760 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5765 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5767 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5770 for(list=0; list<h->list_count; list++){
5772 if(IS_DIR(mb_type, i, list)){
5773 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5774 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5775 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5777 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5778 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5779 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5781 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5782 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5789 if( IS_INTER( mb_type ) ) {
5790 h->chroma_pred_mode_table[mb_xy] = 0;
5791 write_back_motion( h, mb_type );
5794 if( !IS_INTRA16x16( mb_type ) ) {
5795 cbp = decode_cabac_mb_cbp_luma( h );
5797 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5800 h->cbp_table[mb_xy] = h->cbp = cbp;
5802 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5803 if( decode_cabac_mb_transform_size( h ) )
5804 mb_type |= MB_TYPE_8x8DCT;
5806 s->current_picture.mb_type[mb_xy]= mb_type;
5808 if( cbp || IS_INTRA16x16( mb_type ) ) {
5809 const uint8_t *scan, *scan8x8, *dc_scan;
5810 const uint32_t *qmul;
5813 if(IS_INTERLACED(mb_type)){
5814 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5815 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5816 dc_scan= luma_dc_field_scan;
5818 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5819 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5820 dc_scan= luma_dc_zigzag_scan;
5823 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5824 if( dqp == INT_MIN ){
5825 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5829 if(((unsigned)s->qscale) > 51){
5830 if(s->qscale<0) s->qscale+= 52;
5831 else s->qscale-= 52;
5833 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5834 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5836 if( IS_INTRA16x16( mb_type ) ) {
5838 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5839 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5842 qmul = h->dequant4_coeff[0][s->qscale];
5843 for( i = 0; i < 16; i++ ) {
5844 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5845 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5848 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5852 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5853 if( cbp & (1<<i8x8) ) {
5854 if( IS_8x8DCT(mb_type) ) {
5855 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5856 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5858 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5859 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5860 const int index = 4*i8x8 + i4x4;
5861 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5863 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5864 //STOP_TIMER("decode_residual")
5868 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5869 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5876 for( c = 0; c < 2; c++ ) {
5877 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5878 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5884 for( c = 0; c < 2; c++ ) {
5885 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5886 for( i = 0; i < 4; i++ ) {
5887 const int index = 16 + 4 * c + i;
5888 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5889 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5893 uint8_t * const nnz= &h->non_zero_count_cache[0];
5894 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5895 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5898 uint8_t * const nnz= &h->non_zero_count_cache[0];
5899 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5900 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5901 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5902 h->last_qscale_diff = 0;
5905 s->current_picture.qscale_table[mb_xy]= s->qscale;
5906 write_back_non_zero_count(h);
5909 h->ref_count[0] >>= 1;
5910 h->ref_count[1] >>= 1;
5917 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5918 const int index_a = qp + h->slice_alpha_c0_offset;
5919 const int alpha = (alpha_table+52)[index_a];
5920 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5924 tc[0] = (tc0_table+52)[index_a][bS[0]];
5925 tc[1] = (tc0_table+52)[index_a][bS[1]];
5926 tc[2] = (tc0_table+52)[index_a][bS[2]];
5927 tc[3] = (tc0_table+52)[index_a][bS[3]];
5928 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5930 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5933 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5934 const int index_a = qp + h->slice_alpha_c0_offset;
5935 const int alpha = (alpha_table+52)[index_a];
5936 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5940 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5941 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5942 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5943 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5944 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5946 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5950 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5952 for( i = 0; i < 16; i++, pix += stride) {
5958 int bS_index = (i >> 1);
5961 bS_index |= (i & 1);
5964 if( bS[bS_index] == 0 ) {
5968 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5969 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5970 alpha = (alpha_table+52)[index_a];
5971 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5973 if( bS[bS_index] < 4 ) {
5974 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
5975 const int p0 = pix[-1];
5976 const int p1 = pix[-2];
5977 const int p2 = pix[-3];
5978 const int q0 = pix[0];
5979 const int q1 = pix[1];
5980 const int q2 = pix[2];
5982 if( FFABS( p0 - q0 ) < alpha &&
5983 FFABS( p1 - p0 ) < beta &&
5984 FFABS( q1 - q0 ) < beta ) {
5988 if( FFABS( p2 - p0 ) < beta ) {
5989 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5992 if( FFABS( q2 - q0 ) < beta ) {
5993 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5997 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5998 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5999 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6000 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6003 const int p0 = pix[-1];
6004 const int p1 = pix[-2];
6005 const int p2 = pix[-3];
6007 const int q0 = pix[0];
6008 const int q1 = pix[1];
6009 const int q2 = pix[2];
6011 if( FFABS( p0 - q0 ) < alpha &&
6012 FFABS( p1 - p0 ) < beta &&
6013 FFABS( q1 - q0 ) < beta ) {
6015 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6016 if( FFABS( p2 - p0 ) < beta)
6018 const int p3 = pix[-4];
6020 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6021 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6022 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6025 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6027 if( FFABS( q2 - q0 ) < beta)
6029 const int q3 = pix[3];
6031 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6032 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6033 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6036 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6040 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6041 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6043 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6048 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6050 for( i = 0; i < 8; i++, pix += stride) {
6058 if( bS[bS_index] == 0 ) {
6062 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6063 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6064 alpha = (alpha_table+52)[index_a];
6065 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6067 if( bS[bS_index] < 4 ) {
6068 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6069 const int p0 = pix[-1];
6070 const int p1 = pix[-2];
6071 const int q0 = pix[0];
6072 const int q1 = pix[1];
6074 if( FFABS( p0 - q0 ) < alpha &&
6075 FFABS( p1 - p0 ) < beta &&
6076 FFABS( q1 - q0 ) < beta ) {
6077 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6079 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6080 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6081 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6084 const int p0 = pix[-1];
6085 const int p1 = pix[-2];
6086 const int q0 = pix[0];
6087 const int q1 = pix[1];
6089 if( FFABS( p0 - q0 ) < alpha &&
6090 FFABS( p1 - p0 ) < beta &&
6091 FFABS( q1 - q0 ) < beta ) {
6093 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6094 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6095 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6101 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6102 const int index_a = qp + h->slice_alpha_c0_offset;
6103 const int alpha = (alpha_table+52)[index_a];
6104 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6108 tc[0] = (tc0_table+52)[index_a][bS[0]];
6109 tc[1] = (tc0_table+52)[index_a][bS[1]];
6110 tc[2] = (tc0_table+52)[index_a][bS[2]];
6111 tc[3] = (tc0_table+52)[index_a][bS[3]];
6112 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6114 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6118 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6119 const int index_a = qp + h->slice_alpha_c0_offset;
6120 const int alpha = (alpha_table+52)[index_a];
6121 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6125 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6126 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6127 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6128 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6129 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6131 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6135 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6136 MpegEncContext * const s = &h->s;
6137 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6139 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6143 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6144 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6145 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6146 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6147 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6150 assert(!FRAME_MBAFF);
6152 mb_type = s->current_picture.mb_type[mb_xy];
6153 qp = s->current_picture.qscale_table[mb_xy];
6154 qp0 = s->current_picture.qscale_table[mb_xy-1];
6155 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6156 qpc = get_chroma_qp( h, 0, qp );
6157 qpc0 = get_chroma_qp( h, 0, qp0 );
6158 qpc1 = get_chroma_qp( h, 0, qp1 );
6159 qp0 = (qp + qp0 + 1) >> 1;
6160 qp1 = (qp + qp1 + 1) >> 1;
6161 qpc0 = (qpc + qpc0 + 1) >> 1;
6162 qpc1 = (qpc + qpc1 + 1) >> 1;
6163 qp_thresh = 15 - h->slice_alpha_c0_offset;
6164 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6165 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6168 if( IS_INTRA(mb_type) ) {
6169 int16_t bS4[4] = {4,4,4,4};
6170 int16_t bS3[4] = {3,3,3,3};
6171 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6172 if( IS_8x8DCT(mb_type) ) {
6173 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6174 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6175 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6176 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6178 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6179 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6180 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6181 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6182 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6183 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6184 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6185 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6187 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6188 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6189 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6190 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6191 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6192 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6193 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6194 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6197 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6198 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6200 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6202 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6204 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6205 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6206 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6207 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6209 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6210 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6211 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6212 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6214 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6215 bSv[0][0] = 0x0004000400040004ULL;
6216 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6217 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6219 #define FILTER(hv,dir,edge)\
6220 if(bSv[dir][edge]) {\
6221 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6223 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6224 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6230 } else if( IS_8x8DCT(mb_type) ) {
6250 static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6251 MpegEncContext * const s = &h->s;
6253 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6254 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6255 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6256 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6257 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6259 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6260 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6261 // how often to recheck mv-based bS when iterating between edges
6262 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6263 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6264 // how often to recheck mv-based bS when iterating along each edge
6265 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6267 if (first_vertical_edge_done) {
6271 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6274 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6275 && !IS_INTERLACED(mb_type)
6276 && IS_INTERLACED(mbm_type)
6278 // This is a special case in the norm where the filtering must
6279 // be done twice (one each of the field) even if we are in a
6280 // frame macroblock.
6282 static const int nnz_idx[4] = {4,5,6,3};
6283 unsigned int tmp_linesize = 2 * linesize;
6284 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6285 int mbn_xy = mb_xy - 2 * s->mb_stride;
6290 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6291 if( IS_INTRA(mb_type) ||
6292 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6293 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6295 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6296 for( i = 0; i < 4; i++ ) {
6297 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6298 mbn_nnz[nnz_idx[i]] != 0 )
6304 // Do not use s->qscale as luma quantizer because it has not the same
6305 // value in IPCM macroblocks.
6306 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6307 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6308 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6309 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6310 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6311 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6312 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6313 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6320 for( edge = start; edge < edges; edge++ ) {
6321 /* mbn_xy: neighbor macroblock */
6322 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6323 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6324 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6328 if( (edge&1) && IS_8x8DCT(mb_type) )
6331 if( IS_INTRA(mb_type) ||
6332 IS_INTRA(mbn_type) ) {
6335 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6336 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6345 bS[0] = bS[1] = bS[2] = bS[3] = value;
6350 if( edge & mask_edge ) {
6351 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6354 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6355 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6358 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6359 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6360 int bn_idx= b_idx - (dir ? 8:1);
6363 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6364 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6365 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6366 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6369 if(h->slice_type_nos == FF_B_TYPE && v){
6371 for( l = 0; !v && l < 2; l++ ) {
6373 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6374 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6375 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6379 bS[0] = bS[1] = bS[2] = bS[3] = v;
6385 for( i = 0; i < 4; i++ ) {
6386 int x = dir == 0 ? edge : i;
6387 int y = dir == 0 ? i : edge;
6388 int b_idx= 8 + 4 + x + 8*y;
6389 int bn_idx= b_idx - (dir ? 8:1);
6391 if( h->non_zero_count_cache[b_idx] |
6392 h->non_zero_count_cache[bn_idx] ) {
6398 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6399 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6400 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6401 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6407 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6409 for( l = 0; l < 2; l++ ) {
6411 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6412 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6413 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6422 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6427 // Do not use s->qscale as luma quantizer because it has not the same
6428 // value in IPCM macroblocks.
6429 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6430 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6431 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6432 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6434 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6435 if( (edge&1) == 0 ) {
6436 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6437 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6438 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6439 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6442 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6443 if( (edge&1) == 0 ) {
6444 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6445 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6446 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6447 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6453 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6454 MpegEncContext * const s = &h->s;
6455 const int mb_xy= mb_x + mb_y*s->mb_stride;
6456 const int mb_type = s->current_picture.mb_type[mb_xy];
6457 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6458 int first_vertical_edge_done = 0;
6461 //for sufficiently low qp, filtering wouldn't do anything
6462 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6464 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6465 int qp = s->current_picture.qscale_table[mb_xy];
6467 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6468 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6473 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6474 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6475 int top_type, left_type[2];
6476 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6477 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6478 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6480 if(IS_8x8DCT(top_type)){
6481 h->non_zero_count_cache[4+8*0]=
6482 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6483 h->non_zero_count_cache[6+8*0]=
6484 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6486 if(IS_8x8DCT(left_type[0])){
6487 h->non_zero_count_cache[3+8*1]=
6488 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6490 if(IS_8x8DCT(left_type[1])){
6491 h->non_zero_count_cache[3+8*3]=
6492 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6495 if(IS_8x8DCT(mb_type)){
6496 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6497 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6499 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6500 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6502 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6503 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6505 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6506 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6511 // left mb is in picture
6512 && h->slice_table[mb_xy-1] != 0xFFFF
6513 // and current and left pair do not have the same interlaced type
6514 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6515 // and left mb is in the same slice if deblocking_filter == 2
6516 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6517 /* First vertical edge is different in MBAFF frames
6518 * There are 8 different bS to compute and 2 different Qp
6520 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6521 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6526 int mb_qp, mbn0_qp, mbn1_qp;
6528 first_vertical_edge_done = 1;
6530 if( IS_INTRA(mb_type) )
6531 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6533 for( i = 0; i < 8; i++ ) {
6534 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6536 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6538 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6539 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6540 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6542 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6549 mb_qp = s->current_picture.qscale_table[mb_xy];
6550 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6551 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6552 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6553 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6554 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6555 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6556 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6557 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6558 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6559 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6560 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6561 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6564 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6565 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6566 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6567 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6568 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6572 for( dir = 0; dir < 2; dir++ )
6573 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6575 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6576 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6580 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6581 H264Context *h = *(void**)arg;
6582 MpegEncContext * const s = &h->s;
6583 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6587 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6588 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
6590 if( h->pps.cabac ) {
6594 align_get_bits( &s->gb );
6597 ff_init_cabac_states( &h->cabac);
6598 ff_init_cabac_decoder( &h->cabac,
6599 s->gb.buffer + get_bits_count(&s->gb)/8,
6600 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6601 /* calculate pre-state */
6602 for( i= 0; i < 460; i++ ) {
6604 if( h->slice_type_nos == FF_I_TYPE )
6605 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6607 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6610 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6612 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6617 int ret = decode_mb_cabac(h);
6619 //STOP_TIMER("decode_mb_cabac")
6621 if(ret>=0) hl_decode_mb(h);
6623 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6626 ret = decode_mb_cabac(h);
6628 if(ret>=0) hl_decode_mb(h);
6631 eos = get_cabac_terminate( &h->cabac );
6633 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6634 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6635 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6639 if( ++s->mb_x >= s->mb_width ) {
6641 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6643 if(FIELD_OR_MBAFF_PICTURE) {
6648 if( eos || s->mb_y >= s->mb_height ) {
6649 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6650 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6657 int ret = decode_mb_cavlc(h);
6659 if(ret>=0) hl_decode_mb(h);
6661 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6663 ret = decode_mb_cavlc(h);
6665 if(ret>=0) hl_decode_mb(h);
6670 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6671 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6676 if(++s->mb_x >= s->mb_width){
6678 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6680 if(FIELD_OR_MBAFF_PICTURE) {
6683 if(s->mb_y >= s->mb_height){
6684 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6686 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6687 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6691 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6698 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6699 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6700 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6701 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6705 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6714 for(;s->mb_y < s->mb_height; s->mb_y++){
6715 for(;s->mb_x < s->mb_width; s->mb_x++){
6716 int ret= decode_mb(h);
6721 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6722 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6727 if(++s->mb_x >= s->mb_width){
6729 if(++s->mb_y >= s->mb_height){
6730 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6731 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6735 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6742 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6743 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6744 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6748 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6755 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6758 return -1; //not reached
6761 static int decode_picture_timing(H264Context *h){
6762 MpegEncContext * const s = &h->s;
6763 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6764 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6765 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6767 if(h->sps.pic_struct_present_flag){
6768 unsigned int i, num_clock_ts;
6769 h->sei_pic_struct = get_bits(&s->gb, 4);
6771 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6774 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6776 for (i = 0 ; i < num_clock_ts ; i++){
6777 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6778 unsigned int full_timestamp_flag;
6779 skip_bits(&s->gb, 2); /* ct_type */
6780 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6781 skip_bits(&s->gb, 5); /* counting_type */
6782 full_timestamp_flag = get_bits(&s->gb, 1);
6783 skip_bits(&s->gb, 1); /* discontinuity_flag */
6784 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6785 skip_bits(&s->gb, 8); /* n_frames */
6786 if(full_timestamp_flag){
6787 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6788 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6789 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6791 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6792 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6793 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6794 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6795 if(get_bits(&s->gb, 1)) /* hours_flag */
6796 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6800 if(h->sps.time_offset_length > 0)
6801 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6808 static int decode_unregistered_user_data(H264Context *h, int size){
6809 MpegEncContext * const s = &h->s;
6810 uint8_t user_data[16+256];
6816 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6817 user_data[i]= get_bits(&s->gb, 8);
6821 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6822 if(e==1 && build>=0)
6823 h->x264_build= build;
6825 if(s->avctx->debug & FF_DEBUG_BUGS)
6826 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6829 skip_bits(&s->gb, 8);
6834 static int decode_sei(H264Context *h){
6835 MpegEncContext * const s = &h->s;
6837 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6842 type+= show_bits(&s->gb, 8);
6843 }while(get_bits(&s->gb, 8) == 255);
6847 size+= show_bits(&s->gb, 8);
6848 }while(get_bits(&s->gb, 8) == 255);
6851 case 1: // Picture timing SEI
6852 if(decode_picture_timing(h) < 0)
6856 if(decode_unregistered_user_data(h, size) < 0)
6860 skip_bits(&s->gb, 8*size);
6863 //FIXME check bits here
6864 align_get_bits(&s->gb);
6870 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6871 MpegEncContext * const s = &h->s;
6873 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6875 if(cpb_count > 32U){
6876 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6880 get_bits(&s->gb, 4); /* bit_rate_scale */
6881 get_bits(&s->gb, 4); /* cpb_size_scale */
6882 for(i=0; i<cpb_count; i++){
6883 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6884 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6885 get_bits1(&s->gb); /* cbr_flag */
6887 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6888 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6889 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6890 sps->time_offset_length = get_bits(&s->gb, 5);
6894 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6895 MpegEncContext * const s = &h->s;
6896 int aspect_ratio_info_present_flag;
6897 unsigned int aspect_ratio_idc;
6899 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6901 if( aspect_ratio_info_present_flag ) {
6902 aspect_ratio_idc= get_bits(&s->gb, 8);
6903 if( aspect_ratio_idc == EXTENDED_SAR ) {
6904 sps->sar.num= get_bits(&s->gb, 16);
6905 sps->sar.den= get_bits(&s->gb, 16);
6906 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6907 sps->sar= pixel_aspect[aspect_ratio_idc];
6909 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6916 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6918 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6919 get_bits1(&s->gb); /* overscan_appropriate_flag */
6922 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6923 get_bits(&s->gb, 3); /* video_format */
6924 get_bits1(&s->gb); /* video_full_range_flag */
6925 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6926 get_bits(&s->gb, 8); /* colour_primaries */
6927 get_bits(&s->gb, 8); /* transfer_characteristics */
6928 get_bits(&s->gb, 8); /* matrix_coefficients */
6932 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6933 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6934 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6937 sps->timing_info_present_flag = get_bits1(&s->gb);
6938 if(sps->timing_info_present_flag){
6939 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6940 sps->time_scale = get_bits_long(&s->gb, 32);
6941 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6944 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6945 if(sps->nal_hrd_parameters_present_flag)
6946 if(decode_hrd_parameters(h, sps) < 0)
6948 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6949 if(sps->vcl_hrd_parameters_present_flag)
6950 if(decode_hrd_parameters(h, sps) < 0)
6952 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6953 get_bits1(&s->gb); /* low_delay_hrd_flag */
6954 sps->pic_struct_present_flag = get_bits1(&s->gb);
6956 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6957 if(sps->bitstream_restriction_flag){
6958 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6959 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6960 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6961 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6962 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6963 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6964 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6966 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6967 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6975 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6976 const uint8_t *jvt_list, const uint8_t *fallback_list){
6977 MpegEncContext * const s = &h->s;
6978 int i, last = 8, next = 8;
6979 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6980 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6981 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6983 for(i=0;i<size;i++){
6985 next = (last + get_se_golomb(&s->gb)) & 0xff;
6986 if(!i && !next){ /* matrix not written, we use the preset one */
6987 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6990 last = factors[scan[i]] = next ? next : last;
6994 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6995 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6996 MpegEncContext * const s = &h->s;
6997 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6998 const uint8_t *fallback[4] = {
6999 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7000 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7001 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7002 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7004 if(get_bits1(&s->gb)){
7005 sps->scaling_matrix_present |= is_sps;
7006 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7007 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7008 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7009 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7010 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7011 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7012 if(is_sps || pps->transform_8x8_mode){
7013 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7014 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7019 static inline int decode_seq_parameter_set(H264Context *h){
7020 MpegEncContext * const s = &h->s;
7021 int profile_idc, level_idc;
7022 unsigned int sps_id;
7026 profile_idc= get_bits(&s->gb, 8);
7027 get_bits1(&s->gb); //constraint_set0_flag
7028 get_bits1(&s->gb); //constraint_set1_flag
7029 get_bits1(&s->gb); //constraint_set2_flag
7030 get_bits1(&s->gb); //constraint_set3_flag
7031 get_bits(&s->gb, 4); // reserved
7032 level_idc= get_bits(&s->gb, 8);
7033 sps_id= get_ue_golomb_31(&s->gb);
7035 if(sps_id >= MAX_SPS_COUNT) {
7036 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7039 sps= av_mallocz(sizeof(SPS));
7043 sps->profile_idc= profile_idc;
7044 sps->level_idc= level_idc;
7046 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7047 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7048 sps->scaling_matrix_present = 0;
7050 if(sps->profile_idc >= 100){ //high profile
7051 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7052 if(sps->chroma_format_idc == 3)
7053 get_bits1(&s->gb); //residual_color_transform_flag
7054 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7055 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7056 sps->transform_bypass = get_bits1(&s->gb);
7057 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7059 sps->chroma_format_idc= 1;
7062 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7063 sps->poc_type= get_ue_golomb_31(&s->gb);
7065 if(sps->poc_type == 0){ //FIXME #define
7066 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7067 } else if(sps->poc_type == 1){//FIXME #define
7068 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7069 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7070 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7071 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7073 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7074 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7078 for(i=0; i<sps->poc_cycle_length; i++)
7079 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7080 }else if(sps->poc_type != 2){
7081 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7085 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7086 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7087 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7090 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7091 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7092 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7093 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7094 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7095 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7099 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7100 if(!sps->frame_mbs_only_flag)
7101 sps->mb_aff= get_bits1(&s->gb);
7105 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7107 #ifndef ALLOW_INTERLACE
7109 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7111 sps->crop= get_bits1(&s->gb);
7113 sps->crop_left = get_ue_golomb(&s->gb);
7114 sps->crop_right = get_ue_golomb(&s->gb);
7115 sps->crop_top = get_ue_golomb(&s->gb);
7116 sps->crop_bottom= get_ue_golomb(&s->gb);
7117 if(sps->crop_left || sps->crop_top){
7118 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7120 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7121 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7127 sps->crop_bottom= 0;
7130 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7131 if( sps->vui_parameters_present_flag )
7132 decode_vui_parameters(h, sps);
7134 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7135 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7136 sps_id, sps->profile_idc, sps->level_idc,
7138 sps->ref_frame_count,
7139 sps->mb_width, sps->mb_height,
7140 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7141 sps->direct_8x8_inference_flag ? "8B8" : "",
7142 sps->crop_left, sps->crop_right,
7143 sps->crop_top, sps->crop_bottom,
7144 sps->vui_parameters_present_flag ? "VUI" : "",
7145 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7148 av_free(h->sps_buffers[sps_id]);
7149 h->sps_buffers[sps_id]= sps;
7157 build_qp_table(PPS *pps, int t, int index)
7160 for(i = 0; i < 52; i++)
7161 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7164 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7165 MpegEncContext * const s = &h->s;
7166 unsigned int pps_id= get_ue_golomb(&s->gb);
7169 if(pps_id >= MAX_PPS_COUNT) {
7170 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7174 pps= av_mallocz(sizeof(PPS));
7177 pps->sps_id= get_ue_golomb_31(&s->gb);
7178 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7179 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7183 pps->cabac= get_bits1(&s->gb);
7184 pps->pic_order_present= get_bits1(&s->gb);
7185 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7186 if(pps->slice_group_count > 1 ){
7187 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7188 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7189 switch(pps->mb_slice_group_map_type){
7192 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7193 | run_length[ i ] |1 |ue(v) |
7198 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7200 | top_left_mb[ i ] |1 |ue(v) |
7201 | bottom_right_mb[ i ] |1 |ue(v) |
7209 | slice_group_change_direction_flag |1 |u(1) |
7210 | slice_group_change_rate_minus1 |1 |ue(v) |
7215 | slice_group_id_cnt_minus1 |1 |ue(v) |
7216 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7218 | slice_group_id[ i ] |1 |u(v) |
7223 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7224 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7225 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7226 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7230 pps->weighted_pred= get_bits1(&s->gb);
7231 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7232 pps->init_qp= get_se_golomb(&s->gb) + 26;
7233 pps->init_qs= get_se_golomb(&s->gb) + 26;
7234 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7235 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7236 pps->constrained_intra_pred= get_bits1(&s->gb);
7237 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7239 pps->transform_8x8_mode= 0;
7240 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7241 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7242 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7244 if(get_bits_count(&s->gb) < bit_length){
7245 pps->transform_8x8_mode= get_bits1(&s->gb);
7246 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7247 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7249 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7252 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7253 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7254 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7255 h->pps.chroma_qp_diff= 1;
7257 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7258 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7259 pps_id, pps->sps_id,
7260 pps->cabac ? "CABAC" : "CAVLC",
7261 pps->slice_group_count,
7262 pps->ref_count[0], pps->ref_count[1],
7263 pps->weighted_pred ? "weighted" : "",
7264 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7265 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7266 pps->constrained_intra_pred ? "CONSTR" : "",
7267 pps->redundant_pic_cnt_present ? "REDU" : "",
7268 pps->transform_8x8_mode ? "8x8DCT" : ""
7272 av_free(h->pps_buffers[pps_id]);
7273 h->pps_buffers[pps_id]= pps;
7281 * Call decode_slice() for each context.
7283 * @param h h264 master context
7284 * @param context_count number of contexts to execute
7286 static void execute_decode_slices(H264Context *h, int context_count){
7287 MpegEncContext * const s = &h->s;
7288 AVCodecContext * const avctx= s->avctx;
7292 if(context_count == 1) {
7293 decode_slice(avctx, &h);
7295 for(i = 1; i < context_count; i++) {
7296 hx = h->thread_context[i];
7297 hx->s.error_recognition = avctx->error_recognition;
7298 hx->s.error_count = 0;
7301 avctx->execute(avctx, (void *)decode_slice,
7302 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7304 /* pull back stuff from slices to master context */
7305 hx = h->thread_context[context_count - 1];
7306 s->mb_x = hx->s.mb_x;
7307 s->mb_y = hx->s.mb_y;
7308 s->dropable = hx->s.dropable;
7309 s->picture_structure = hx->s.picture_structure;
7310 for(i = 1; i < context_count; i++)
7311 h->s.error_count += h->thread_context[i]->s.error_count;
7316 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7317 MpegEncContext * const s = &h->s;
7318 AVCodecContext * const avctx= s->avctx;
7320 H264Context *hx; ///< thread context
7321 int context_count = 0;
7323 h->max_contexts = avctx->thread_count;
7326 for(i=0; i<50; i++){
7327 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7330 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7331 h->current_slice = 0;
7332 if (!s->first_field)
7333 s->current_picture_ptr= NULL;
7345 if(buf_index >= buf_size) break;
7347 for(i = 0; i < h->nal_length_size; i++)
7348 nalsize = (nalsize << 8) | buf[buf_index++];
7349 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7354 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7359 // start code prefix search
7360 for(; buf_index + 3 < buf_size; buf_index++){
7361 // This should always succeed in the first iteration.
7362 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7366 if(buf_index+3 >= buf_size) break;
7371 hx = h->thread_context[context_count];
7373 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7374 if (ptr==NULL || dst_length < 0){
7377 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7379 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7381 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7382 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7385 if (h->is_avc && (nalsize != consumed)){
7386 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7390 buf_index += consumed;
7392 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7393 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7398 switch(hx->nal_unit_type){
7400 if (h->nal_unit_type != NAL_IDR_SLICE) {
7401 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7404 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7406 init_get_bits(&hx->s.gb, ptr, bit_length);
7408 hx->inter_gb_ptr= &hx->s.gb;
7409 hx->s.data_partitioning = 0;
7411 if((err = decode_slice_header(hx, h)))
7414 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7415 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7416 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7417 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7418 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7419 && avctx->skip_frame < AVDISCARD_ALL)
7423 init_get_bits(&hx->s.gb, ptr, bit_length);
7425 hx->inter_gb_ptr= NULL;
7426 hx->s.data_partitioning = 1;
7428 err = decode_slice_header(hx, h);
7431 init_get_bits(&hx->intra_gb, ptr, bit_length);
7432 hx->intra_gb_ptr= &hx->intra_gb;
7435 init_get_bits(&hx->inter_gb, ptr, bit_length);
7436 hx->inter_gb_ptr= &hx->inter_gb;
7438 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7439 && s->context_initialized
7441 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7442 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7443 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7444 && avctx->skip_frame < AVDISCARD_ALL)
7448 init_get_bits(&s->gb, ptr, bit_length);
7452 init_get_bits(&s->gb, ptr, bit_length);
7453 decode_seq_parameter_set(h);
7455 if(s->flags& CODEC_FLAG_LOW_DELAY)
7458 if(avctx->has_b_frames < 2)
7459 avctx->has_b_frames= !s->low_delay;
7462 init_get_bits(&s->gb, ptr, bit_length);
7464 decode_picture_parameter_set(h, bit_length);
7468 case NAL_END_SEQUENCE:
7469 case NAL_END_STREAM:
7470 case NAL_FILLER_DATA:
7472 case NAL_AUXILIARY_SLICE:
7475 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7478 if(context_count == h->max_contexts) {
7479 execute_decode_slices(h, context_count);
7484 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7486 /* Slice could not be decoded in parallel mode, copy down
7487 * NAL unit stuff to context 0 and restart. Note that
7488 * rbsp_buffer is not transferred, but since we no longer
7489 * run in parallel mode this should not be an issue. */
7490 h->nal_unit_type = hx->nal_unit_type;
7491 h->nal_ref_idc = hx->nal_ref_idc;
7497 execute_decode_slices(h, context_count);
7502 * returns the number of bytes consumed for building the current frame
7504 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7505 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7506 if(pos+10>buf_size) pos=buf_size; // oops ;)
7511 static int decode_frame(AVCodecContext *avctx,
7512 void *data, int *data_size,
7513 const uint8_t *buf, int buf_size)
7515 H264Context *h = avctx->priv_data;
7516 MpegEncContext *s = &h->s;
7517 AVFrame *pict = data;
7520 s->flags= avctx->flags;
7521 s->flags2= avctx->flags2;
7523 /* end of stream, output what is still in the buffers */
7524 if (buf_size == 0) {
7528 //FIXME factorize this with the output code below
7529 out = h->delayed_pic[0];
7531 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7532 if(h->delayed_pic[i]->poc < out->poc){
7533 out = h->delayed_pic[i];
7537 for(i=out_idx; h->delayed_pic[i]; i++)
7538 h->delayed_pic[i] = h->delayed_pic[i+1];
7541 *data_size = sizeof(AVFrame);
7542 *pict= *(AVFrame*)out;
7548 if(h->is_avc && !h->got_avcC) {
7549 int i, cnt, nalsize;
7550 unsigned char *p = avctx->extradata;
7551 if(avctx->extradata_size < 7) {
7552 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7556 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7559 /* sps and pps in the avcC always have length coded with 2 bytes,
7560 so put a fake nal_length_size = 2 while parsing them */
7561 h->nal_length_size = 2;
7562 // Decode sps from avcC
7563 cnt = *(p+5) & 0x1f; // Number of sps
7565 for (i = 0; i < cnt; i++) {
7566 nalsize = AV_RB16(p) + 2;
7567 if(decode_nal_units(h, p, nalsize) < 0) {
7568 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7573 // Decode pps from avcC
7574 cnt = *(p++); // Number of pps
7575 for (i = 0; i < cnt; i++) {
7576 nalsize = AV_RB16(p) + 2;
7577 if(decode_nal_units(h, p, nalsize) != nalsize) {
7578 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7583 // Now store right nal length size, that will be use to parse all other nals
7584 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7585 // Do not reparse avcC
7589 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7590 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7595 buf_index=decode_nal_units(h, buf, buf_size);
7599 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7600 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7601 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7605 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7606 Picture *out = s->current_picture_ptr;
7607 Picture *cur = s->current_picture_ptr;
7608 int i, pics, cross_idr, out_of_order, out_idx;
7612 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7613 s->current_picture_ptr->pict_type= s->pict_type;
7616 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7617 h->prev_poc_msb= h->poc_msb;
7618 h->prev_poc_lsb= h->poc_lsb;
7620 h->prev_frame_num_offset= h->frame_num_offset;
7621 h->prev_frame_num= h->frame_num;
7624 * FIXME: Error handling code does not seem to support interlaced
7625 * when slices span multiple rows
7626 * The ff_er_add_slice calls don't work right for bottom
7627 * fields; they cause massive erroneous error concealing
7628 * Error marking covers both fields (top and bottom).
7629 * This causes a mismatched s->error_count
7630 * and a bad error table. Further, the error count goes to
7631 * INT_MAX when called for bottom field, because mb_y is
7632 * past end by one (callers fault) and resync_mb_y != 0
7633 * causes problems for the first MB line, too.
7640 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7641 /* Wait for second field. */
7645 cur->repeat_pict = 0;
7647 /* Signal interlacing information externally. */
7648 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7649 if(h->sps.pic_struct_present_flag){
7650 switch (h->sei_pic_struct)
7652 case SEI_PIC_STRUCT_FRAME:
7653 cur->interlaced_frame = 0;
7655 case SEI_PIC_STRUCT_TOP_FIELD:
7656 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7657 case SEI_PIC_STRUCT_TOP_BOTTOM:
7658 case SEI_PIC_STRUCT_BOTTOM_TOP:
7659 cur->interlaced_frame = 1;
7661 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7662 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7663 // Signal the possibility of telecined film externally (pic_struct 5,6)
7664 // From these hints, let the applications decide if they apply deinterlacing.
7665 cur->repeat_pict = 1;
7666 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7668 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7669 // Force progressive here, as doubling interlaced frame is a bad idea.
7670 cur->interlaced_frame = 0;
7671 cur->repeat_pict = 2;
7673 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7674 cur->interlaced_frame = 0;
7675 cur->repeat_pict = 4;
7679 /* Derive interlacing flag from used decoding process. */
7680 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7683 if (cur->field_poc[0] != cur->field_poc[1]){
7684 /* Derive top_field_first from field pocs. */
7685 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7687 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7688 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7689 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7690 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7691 cur->top_field_first = 1;
7693 cur->top_field_first = 0;
7695 /* Most likely progressive */
7696 cur->top_field_first = 0;
7700 //FIXME do something with unavailable reference frames
7702 /* Sort B-frames into display order */
7704 if(h->sps.bitstream_restriction_flag
7705 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7706 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7710 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7711 && !h->sps.bitstream_restriction_flag){
7712 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7717 while(h->delayed_pic[pics]) pics++;
7719 assert(pics <= MAX_DELAYED_PIC_COUNT);
7721 h->delayed_pic[pics++] = cur;
7722 if(cur->reference == 0)
7723 cur->reference = DELAYED_PIC_REF;
7725 out = h->delayed_pic[0];
7727 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7728 if(h->delayed_pic[i]->poc < out->poc){
7729 out = h->delayed_pic[i];
7732 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7734 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7736 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7738 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7740 ((!cross_idr && out->poc > h->outputed_poc + 2)
7741 || cur->pict_type == FF_B_TYPE)))
7744 s->avctx->has_b_frames++;
7747 if(out_of_order || pics > s->avctx->has_b_frames){
7748 out->reference &= ~DELAYED_PIC_REF;
7749 for(i=out_idx; h->delayed_pic[i]; i++)
7750 h->delayed_pic[i] = h->delayed_pic[i+1];
7752 if(!out_of_order && pics > s->avctx->has_b_frames){
7753 *data_size = sizeof(AVFrame);
7755 h->outputed_poc = out->poc;
7756 *pict= *(AVFrame*)out;
7758 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7763 assert(pict->data[0] || !*data_size);
7764 ff_print_debug_info(s, pict);
7765 //printf("out %d\n", (int)pict->data[0]);
7768 /* Return the Picture timestamp as the frame number */
7769 /* we subtract 1 because it is added on utils.c */
7770 avctx->frame_number = s->picture_number - 1;
7772 return get_consumed_bytes(s, buf_index, buf_size);
7775 static inline void fill_mb_avail(H264Context *h){
7776 MpegEncContext * const s = &h->s;
7777 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7780 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7781 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7782 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7788 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7789 h->mb_avail[4]= 1; //FIXME move out
7790 h->mb_avail[5]= 0; //FIXME move out
7798 #define SIZE (COUNT*40)
7804 // int int_temp[10000];
7806 AVCodecContext avctx;
7808 dsputil_init(&dsp, &avctx);
7810 init_put_bits(&pb, temp, SIZE);
7811 printf("testing unsigned exp golomb\n");
7812 for(i=0; i<COUNT; i++){
7814 set_ue_golomb(&pb, i);
7815 STOP_TIMER("set_ue_golomb");
7817 flush_put_bits(&pb);
7819 init_get_bits(&gb, temp, 8*SIZE);
7820 for(i=0; i<COUNT; i++){
7823 s= show_bits(&gb, 24);
7826 j= get_ue_golomb(&gb);
7828 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7831 STOP_TIMER("get_ue_golomb");
7835 init_put_bits(&pb, temp, SIZE);
7836 printf("testing signed exp golomb\n");
7837 for(i=0; i<COUNT; i++){
7839 set_se_golomb(&pb, i - COUNT/2);
7840 STOP_TIMER("set_se_golomb");
7842 flush_put_bits(&pb);
7844 init_get_bits(&gb, temp, 8*SIZE);
7845 for(i=0; i<COUNT; i++){
7848 s= show_bits(&gb, 24);
7851 j= get_se_golomb(&gb);
7852 if(j != i - COUNT/2){
7853 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7856 STOP_TIMER("get_se_golomb");
7860 printf("testing 4x4 (I)DCT\n");
7863 uint8_t src[16], ref[16];
7864 uint64_t error= 0, max_error=0;
7866 for(i=0; i<COUNT; i++){
7868 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7869 for(j=0; j<16; j++){
7870 ref[j]= random()%255;
7871 src[j]= random()%255;
7874 h264_diff_dct_c(block, src, ref, 4);
7877 for(j=0; j<16; j++){
7878 // printf("%d ", block[j]);
7879 block[j]= block[j]*4;
7880 if(j&1) block[j]= (block[j]*4 + 2)/5;
7881 if(j&4) block[j]= (block[j]*4 + 2)/5;
7885 s->dsp.h264_idct_add(ref, block, 4);
7886 /* for(j=0; j<16; j++){
7887 printf("%d ", ref[j]);
7891 for(j=0; j<16; j++){
7892 int diff= FFABS(src[j] - ref[j]);
7895 max_error= FFMAX(max_error, diff);
7898 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7899 printf("testing quantizer\n");
7900 for(qp=0; qp<52; qp++){
7902 src1_block[i]= src2_block[i]= random()%255;
7905 printf("Testing NAL layer\n");
7907 uint8_t bitstream[COUNT];
7908 uint8_t nal[COUNT*2];
7910 memset(&h, 0, sizeof(H264Context));
7912 for(i=0; i<COUNT; i++){
7920 for(j=0; j<COUNT; j++){
7921 bitstream[j]= (random() % 255) + 1;
7924 for(j=0; j<zeros; j++){
7925 int pos= random() % COUNT;
7926 while(bitstream[pos] == 0){
7935 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7937 printf("encoding failed\n");
7941 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7945 if(out_length != COUNT){
7946 printf("incorrect length %d %d\n", out_length, COUNT);
7950 if(consumed != nal_length){
7951 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7955 if(memcmp(bitstream, out, COUNT)){
7956 printf("mismatch\n");
7962 printf("Testing RBSP\n");
7970 static av_cold int decode_end(AVCodecContext *avctx)
7972 H264Context *h = avctx->priv_data;
7973 MpegEncContext *s = &h->s;
7976 av_freep(&h->rbsp_buffer[0]);
7977 av_freep(&h->rbsp_buffer[1]);
7978 free_tables(h); //FIXME cleanup init stuff perhaps
7980 for(i = 0; i < MAX_SPS_COUNT; i++)
7981 av_freep(h->sps_buffers + i);
7983 for(i = 0; i < MAX_PPS_COUNT; i++)
7984 av_freep(h->pps_buffers + i);
7988 // memset(h, 0, sizeof(H264Context));
7994 AVCodec h264_decoder = {
7998 sizeof(H264Context),
8003 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8005 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),