2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
36 #include "rectangle.h"
37 #include "vdpau_internal.h"
41 #include "x86/h264_i386.h"
48 * Value of Picture.reference when Picture is not a reference picture, but
49 * is held for delayed output.
51 #define DELAYED_PIC_REF 4
53 static VLC coeff_token_vlc[4];
54 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
55 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
57 static VLC chroma_dc_coeff_token_vlc;
58 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
59 static const int chroma_dc_coeff_token_vlc_table_size = 256;
61 static VLC total_zeros_vlc[15];
62 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
63 static const int total_zeros_vlc_tables_size = 512;
65 static VLC chroma_dc_total_zeros_vlc[3];
66 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
67 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
69 static VLC run_vlc[6];
70 static VLC_TYPE run_vlc_tables[6][8][2];
71 static const int run_vlc_tables_size = 8;
74 static VLC_TYPE run7_vlc_table[96][2];
75 static const int run7_vlc_table_size = 96;
77 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
78 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
79 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static Picture * remove_long(H264Context *h, int i, int ref_mask);
83 static av_always_inline uint32_t pack16to32(int a, int b){
84 #ifdef WORDS_BIGENDIAN
85 return (b&0xFFFF) + (a<<16);
87 return (a&0xFFFF) + (b<<16);
91 static const uint8_t rem6[52]={
92 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
95 static const uint8_t div6[52]={
96 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
99 static const uint8_t left_block_options[4][8]={
106 #define LEVEL_TAB_BITS 8
107 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
109 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
110 MpegEncContext * const s = &h->s;
111 const int mb_xy= h->mb_xy;
112 int topleft_xy, top_xy, topright_xy, left_xy[2];
113 int topleft_type, top_type, topright_type, left_type[2];
114 const uint8_t * left_block;
115 int topleft_partition= -1;
118 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
120 //FIXME deblocking could skip the intra and nnz parts.
121 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
124 /* Wow, what a mess, why didn't they simplify the interlacing & intra
125 * stuff, I can't imagine that these complex rules are worth it. */
127 topleft_xy = top_xy - 1;
128 topright_xy= top_xy + 1;
129 left_xy[1] = left_xy[0] = mb_xy-1;
130 left_block = left_block_options[0];
132 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
133 const int top_pair_xy = pair_xy - s->mb_stride;
134 const int topleft_pair_xy = top_pair_xy - 1;
135 const int topright_pair_xy = top_pair_xy + 1;
136 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
137 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
138 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
139 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
140 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
141 const int bottom = (s->mb_y & 1);
142 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
144 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
145 top_xy -= s->mb_stride;
147 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
155 topright_xy -= s->mb_stride;
157 if (left_mb_field_flag != curr_mb_field_flag) {
158 left_xy[1] = left_xy[0] = pair_xy - 1;
159 if (curr_mb_field_flag) {
160 left_xy[1] += s->mb_stride;
161 left_block = left_block_options[3];
163 left_block= left_block_options[2 - bottom];
168 h->top_mb_xy = top_xy;
169 h->left_mb_xy[0] = left_xy[0];
170 h->left_mb_xy[1] = left_xy[1];
174 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
175 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
176 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
178 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 for(list=0; list<h->list_count; list++){
181 //These values where changed for ease of performing MC, we need to change them back
182 //FIXME maybe we can make MC and loop filter use the same values or prevent
183 //the MC code from changing ref_cache and rather use a temporary array.
184 if(USES_LIST(mb_type,list)){
185 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
196 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
197 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
198 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
199 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
201 if(IS_INTRA(mb_type)){
202 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
203 h->topleft_samples_available=
204 h->top_samples_available=
205 h->left_samples_available= 0xFFFF;
206 h->topright_samples_available= 0xEEEA;
208 if(!(top_type & type_mask)){
209 h->topleft_samples_available= 0xB3FF;
210 h->top_samples_available= 0x33FF;
211 h->topright_samples_available= 0x26EA;
213 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
214 if(IS_INTERLACED(mb_type)){
215 if(!(left_type[0] & type_mask)){
216 h->topleft_samples_available&= 0xDFFF;
217 h->left_samples_available&= 0x5FFF;
219 if(!(left_type[1] & type_mask)){
220 h->topleft_samples_available&= 0xFF5F;
221 h->left_samples_available&= 0xFF5F;
224 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
225 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
226 assert(left_xy[0] == left_xy[1]);
227 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
228 h->topleft_samples_available&= 0xDF5F;
229 h->left_samples_available&= 0x5F5F;
233 if(!(left_type[0] & type_mask)){
234 h->topleft_samples_available&= 0xDF5F;
235 h->left_samples_available&= 0x5F5F;
239 if(!(topleft_type & type_mask))
240 h->topleft_samples_available&= 0x7FFF;
242 if(!(topright_type & type_mask))
243 h->topright_samples_available&= 0xFBFF;
245 if(IS_INTRA4x4(mb_type)){
246 if(IS_INTRA4x4(top_type)){
247 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
248 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
249 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
250 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
253 if(!(top_type & type_mask))
258 h->intra4x4_pred_mode_cache[4+8*0]=
259 h->intra4x4_pred_mode_cache[5+8*0]=
260 h->intra4x4_pred_mode_cache[6+8*0]=
261 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 if(IS_INTRA4x4(left_type[i])){
265 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
266 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
269 if(!(left_type[i] & type_mask))
274 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
275 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
294 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
295 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
296 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
298 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
299 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
301 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
302 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 h->non_zero_count_cache[4+8*0]=
306 h->non_zero_count_cache[5+8*0]=
307 h->non_zero_count_cache[6+8*0]=
308 h->non_zero_count_cache[7+8*0]=
310 h->non_zero_count_cache[1+8*0]=
311 h->non_zero_count_cache[2+8*0]=
313 h->non_zero_count_cache[1+8*3]=
314 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 for (i=0; i<2; i++) {
320 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
321 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
322 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 h->non_zero_count_cache[3+8*1 + 2*8*i]=
326 h->non_zero_count_cache[3+8*2 + 2*8*i]=
327 h->non_zero_count_cache[0+8*1 + 8*i]=
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
335 h->top_cbp = h->cbp_table[top_xy];
336 } else if(IS_INTRA(mb_type)) {
343 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
344 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 for(list=0; list<h->list_count; list++){
361 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
362 /*if(!h->mv_cache_clean[list]){
363 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
364 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
365 h->mv_cache_clean[list]= 1;
369 h->mv_cache_clean[list]= 0;
371 if(USES_LIST(top_type, list)){
372 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
373 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
378 h->ref_cache[list][scan8[0] + 0 - 1*8]=
379 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
380 h->ref_cache[list][scan8[0] + 2 - 1*8]=
381 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
387 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 int cache_idx = scan8[0] - 1 + i*2*8;
392 if(USES_LIST(left_type[i], list)){
393 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
394 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
395 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
396 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
397 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
398 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 *(uint32_t*)h->mv_cache [list][cache_idx ]=
401 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
402 h->ref_cache[list][cache_idx ]=
403 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
407 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
410 if(USES_LIST(topleft_type, list)){
411 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
412 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
413 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
414 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
417 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
420 if(USES_LIST(topright_type, list)){
421 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
422 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
423 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
424 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
427 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
430 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
433 h->ref_cache[list][scan8[5 ]+1] =
434 h->ref_cache[list][scan8[7 ]+1] =
435 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
436 h->ref_cache[list][scan8[4 ]] =
437 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
438 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
441 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
442 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 /* XXX beurk, Load mvd */
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 if(USES_LIST(left_type[0], list)){
459 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 if(USES_LIST(left_type[1], list)){
467 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
477 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
478 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
480 if(h->slice_type_nos == FF_B_TYPE){
481 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483 if(IS_DIRECT(top_type)){
484 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
485 }else if(IS_8X8(top_type)){
486 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
487 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
488 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
493 if(IS_DIRECT(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
495 else if(IS_8X8(left_type[0]))
496 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500 if(IS_DIRECT(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
502 else if(IS_8X8(left_type[1]))
503 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
511 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
512 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
517 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
520 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 #define MAP_F2F(idx, mb_type)\
523 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
524 h->ref_cache[list][idx] <<= 1;\
525 h->mv_cache[list][idx][1] /= 2;\
526 h->mvd_cache[list][idx][1] /= 2;\
531 #define MAP_F2F(idx, mb_type)\
532 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
533 h->ref_cache[list][idx] >>= 1;\
534 h->mv_cache[list][idx][1] <<= 1;\
535 h->mvd_cache[list][idx][1] <<= 1;\
545 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
548 static inline void write_back_intra_pred_mode(H264Context *h){
549 const int mb_xy= h->mb_xy;
551 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
552 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
553 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
554 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
555 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
556 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
557 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
561 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
563 static inline int check_intra4x4_pred_mode(H264Context *h){
564 MpegEncContext * const s = &h->s;
565 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
566 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
569 if(!(h->top_samples_available&0x8000)){
571 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
576 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
581 if((h->left_samples_available&0x8888)!=0x8888){
582 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 if(!(h->left_samples_available&mask[i])){
585 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
590 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 } //FIXME cleanup like next
600 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
602 static inline int check_intra_pred_mode(H264Context *h, int mode){
603 MpegEncContext * const s = &h->s;
604 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
605 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
612 if(!(h->top_samples_available&0x8000)){
615 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
620 if((h->left_samples_available&0x8080) != 0x8080){
622 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
623 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
635 * gets the predicted intra4x4 prediction mode.
637 static inline int pred_intra_mode(H264Context *h, int n){
638 const int index8= scan8[n];
639 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
640 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
641 const int min= FFMIN(left, top);
643 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
645 if(min<0) return DC_PRED;
649 static inline void write_back_non_zero_count(H264Context *h){
650 const int mb_xy= h->mb_xy;
652 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
653 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
654 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
655 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
656 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
657 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
658 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
660 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
661 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
662 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
664 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
665 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
666 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
670 * gets the predicted number of non-zero coefficients.
671 * @param n block index
673 static inline int pred_non_zero_count(H264Context *h, int n){
674 const int index8= scan8[n];
675 const int left= h->non_zero_count_cache[index8 - 1];
676 const int top = h->non_zero_count_cache[index8 - 8];
679 if(i<64) i= (i+1)>>1;
681 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
686 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
687 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
688 MpegEncContext *s = &h->s;
690 /* there is no consistent mapping of mvs to neighboring locations that will
691 * make mbaff happy, so we can't move all this logic to fill_caches */
693 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
696 *C = h->mv_cache[list][scan8[0]-2];
699 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
700 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
701 if(IS_INTERLACED(mb_types[topright_xy])){
702 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
703 const int x4 = X4, y4 = Y4;\
704 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
705 if(!USES_LIST(mb_type,list))\
706 return LIST_NOT_USED;\
707 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
708 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
709 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
710 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
712 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
715 if(topright_ref == PART_NOT_AVAILABLE
716 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
717 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
720 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
726 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
732 if(topright_ref != PART_NOT_AVAILABLE){
733 *C= h->mv_cache[list][ i - 8 + part_width ];
736 tprintf(s->avctx, "topright MV not available\n");
738 *C= h->mv_cache[list][ i - 8 - 1 ];
739 return h->ref_cache[list][ i - 8 - 1 ];
744 * gets the predicted MV.
745 * @param n the block index
746 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
747 * @param mx the x component of the predicted motion vector
748 * @param my the y component of the predicted motion vector
750 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
751 const int index8= scan8[n];
752 const int top_ref= h->ref_cache[list][ index8 - 8 ];
753 const int left_ref= h->ref_cache[list][ index8 - 1 ];
754 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
755 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 int diagonal_ref, match_count;
759 assert(part_width==1 || part_width==2 || part_width==4);
769 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
770 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
771 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
772 if(match_count > 1){ //most common
773 *mx= mid_pred(A[0], B[0], C[0]);
774 *my= mid_pred(A[1], B[1], C[1]);
775 }else if(match_count==1){
779 }else if(top_ref==ref){
787 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
791 *mx= mid_pred(A[0], B[0], C[0]);
792 *my= mid_pred(A[1], B[1], C[1]);
796 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
800 * gets the directionally predicted 16x8 MV.
801 * @param n the block index
802 * @param mx the x component of the predicted motion vector
803 * @param my the y component of the predicted motion vector
805 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
808 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
810 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
818 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
819 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
821 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
831 pred_motion(h, n, 4, list, ref, mx, my);
835 * gets the directionally predicted 8x16 MV.
836 * @param n the block index
837 * @param mx the x component of the predicted motion vector
838 * @param my the y component of the predicted motion vector
840 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
843 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
845 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
856 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
858 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
860 if(diagonal_ref == ref){
868 pred_motion(h, n, 2, list, ref, mx, my);
871 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
872 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
873 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
875 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
877 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
878 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
879 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
885 pred_motion(h, 0, 4, 0, 0, mx, my);
890 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
891 int poc0 = h->ref_list[0][i].poc;
892 int td = av_clip(poc1 - poc0, -128, 127);
893 if(td == 0 || h->ref_list[0][i].long_ref){
896 int tb = av_clip(poc - poc0, -128, 127);
897 int tx = (16384 + (FFABS(td) >> 1)) / td;
898 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
902 static inline void direct_dist_scale_factor(H264Context * const h){
903 MpegEncContext * const s = &h->s;
904 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
905 const int poc1 = h->ref_list[1][0].poc;
907 for(field=0; field<2; field++){
908 const int poc = h->s.current_picture_ptr->field_poc[field];
909 const int poc1 = h->ref_list[1][0].field_poc[field];
910 for(i=0; i < 2*h->ref_count[0]; i++)
911 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
919 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
920 MpegEncContext * const s = &h->s;
921 Picture * const ref1 = &h->ref_list[1][0];
922 int j, old_ref, rfield;
923 int start= mbafi ? 16 : 0;
924 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
925 int interl= mbafi || s->picture_structure != PICT_FRAME;
927 /* bogus; fills in for missing frames */
928 memset(map[list], 0, sizeof(map[list]));
930 for(rfield=0; rfield<2; rfield++){
931 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
932 int poc = ref1->ref_poc[colfield][list][old_ref];
936 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
937 poc= (poc&~3) + rfield + 1;
939 for(j=start; j<end; j++){
940 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
941 int cur_ref= mbafi ? (j-16)^field : j;
942 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 map[list][old_ref] = cur_ref;
952 static inline void direct_ref_list_init(H264Context * const h){
953 MpegEncContext * const s = &h->s;
954 Picture * const ref1 = &h->ref_list[1][0];
955 Picture * const cur = s->current_picture_ptr;
957 int sidx= (s->picture_structure&1)^1;
958 int ref1sidx= (ref1->reference&1)^1;
960 for(list=0; list<2; list++){
961 cur->ref_count[sidx][list] = h->ref_count[list];
962 for(j=0; j<h->ref_count[list]; j++)
963 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
966 if(s->picture_structure == PICT_FRAME){
967 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
968 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
971 cur->mbaff= FRAME_MBAFF;
973 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
976 for(list=0; list<2; list++){
977 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
978 for(field=0; field<2; field++)
979 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
983 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
984 MpegEncContext * const s = &h->s;
985 int b8_stride = h->b8_stride;
986 int b4_stride = h->b_stride;
987 int mb_xy = h->mb_xy;
989 const int16_t (*l1mv0)[2], (*l1mv1)[2];
990 const int8_t *l1ref0, *l1ref1;
991 const int is_b8x8 = IS_8X8(*mb_type);
992 unsigned int sub_mb_type;
995 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
997 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
998 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
999 int cur_poc = s->current_picture_ptr->poc;
1000 int *col_poc = h->ref_list[1]->field_poc;
1001 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1002 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1004 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1005 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1006 mb_xy += s->mb_stride*fieldoff;
1009 }else{ // AFL/AFR/FR/FL -> AFR/FR
1010 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1011 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1012 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1013 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1016 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1017 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1018 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 }else{ // AFR/FR -> AFR/FR
1029 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1030 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1031 /* FIXME save sub mb types from previous frames (or derive from MVs)
1032 * so we know exactly what block size to use */
1033 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1034 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1035 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1036 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1037 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1045 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1046 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1047 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1048 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1051 l1ref0 += h->b8_stride;
1052 l1ref1 += h->b8_stride;
1053 l1mv0 += 2*b4_stride;
1054 l1mv1 += 2*b4_stride;
1058 if(h->direct_spatial_mv_pred){
1063 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1065 /* ref = min(neighbors) */
1066 for(list=0; list<2; list++){
1067 int refa = h->ref_cache[list][scan8[0] - 1];
1068 int refb = h->ref_cache[list][scan8[0] - 8];
1069 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1070 if(refc == PART_NOT_AVAILABLE)
1071 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1072 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1077 if(ref[0] < 0 && ref[1] < 0){
1078 ref[0] = ref[1] = 0;
1079 mv[0][0] = mv[0][1] =
1080 mv[1][0] = mv[1][1] = 0;
1082 for(list=0; list<2; list++){
1084 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1086 mv[list][0] = mv[list][1] = 0;
1092 *mb_type &= ~MB_TYPE_L1;
1093 sub_mb_type &= ~MB_TYPE_L1;
1094 }else if(ref[0] < 0){
1096 *mb_type &= ~MB_TYPE_L0;
1097 sub_mb_type &= ~MB_TYPE_L0;
1100 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1101 for(i8=0; i8<4; i8++){
1104 int xy8 = x8+y8*b8_stride;
1105 int xy4 = 3*x8+y8*b4_stride;
1108 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1110 h->sub_mb_type[i8] = sub_mb_type;
1112 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1114 if(!IS_INTRA(mb_type_col[y8])
1115 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1116 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1118 a= pack16to32(mv[0][0],mv[0][1]);
1120 b= pack16to32(mv[1][0],mv[1][1]);
1122 a= pack16to32(mv[0][0],mv[0][1]);
1123 b= pack16to32(mv[1][0],mv[1][1]);
1125 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1126 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1128 }else if(IS_16X16(*mb_type)){
1131 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1132 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1133 if(!IS_INTRA(mb_type_col[0])
1134 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1135 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1136 && (h->x264_build>33 || !h->x264_build)))){
1138 a= pack16to32(mv[0][0],mv[0][1]);
1140 b= pack16to32(mv[1][0],mv[1][1]);
1142 a= pack16to32(mv[0][0],mv[0][1]);
1143 b= pack16to32(mv[1][0],mv[1][1]);
1145 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1146 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1148 for(i8=0; i8<4; i8++){
1149 const int x8 = i8&1;
1150 const int y8 = i8>>1;
1152 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1154 h->sub_mb_type[i8] = sub_mb_type;
1156 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1158 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1159 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1162 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1163 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1164 && (h->x264_build>33 || !h->x264_build)))){
1165 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1166 if(IS_SUB_8X8(sub_mb_type)){
1167 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1168 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1170 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1172 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1175 for(i4=0; i4<4; i4++){
1176 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1177 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1179 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1181 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1187 }else{ /* direct temporal mv pred */
1188 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1189 const int *dist_scale_factor = h->dist_scale_factor;
1192 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1193 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1194 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1195 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1197 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1200 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1201 /* FIXME assumes direct_8x8_inference == 1 */
1202 int y_shift = 2*!IS_INTERLACED(*mb_type);
1204 for(i8=0; i8<4; i8++){
1205 const int x8 = i8&1;
1206 const int y8 = i8>>1;
1208 const int16_t (*l1mv)[2]= l1mv0;
1210 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1212 h->sub_mb_type[i8] = sub_mb_type;
1214 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1215 if(IS_INTRA(mb_type_col[y8])){
1216 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1217 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1218 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 ref0 = l1ref0[x8 + y8*b8_stride];
1224 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1226 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1229 scale = dist_scale_factor[ref0];
1230 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1233 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1234 int my_col = (mv_col[1]<<y_shift)/2;
1235 int mx = (scale * mv_col[0] + 128) >> 8;
1236 int my = (scale * my_col + 128) >> 8;
1237 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1238 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 /* one-to-one mv scaling */
1246 if(IS_16X16(*mb_type)){
1249 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1250 if(IS_INTRA(mb_type_col[0])){
1253 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1254 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1255 const int scale = dist_scale_factor[ref0];
1256 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1258 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1259 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1261 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1262 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1264 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1265 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1266 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1268 for(i8=0; i8<4; i8++){
1269 const int x8 = i8&1;
1270 const int y8 = i8>>1;
1272 const int16_t (*l1mv)[2]= l1mv0;
1274 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1276 h->sub_mb_type[i8] = sub_mb_type;
1277 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col[0])){
1279 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1280 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1281 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1287 ref0 = map_col_to_list0[0][ref0];
1289 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1292 scale = dist_scale_factor[ref0];
1294 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1295 if(IS_SUB_8X8(sub_mb_type)){
1296 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1297 int mx = (scale * mv_col[0] + 128) >> 8;
1298 int my = (scale * mv_col[1] + 128) >> 8;
1299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1302 for(i4=0; i4<4; i4++){
1303 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1304 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1305 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1306 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1307 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1308 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1315 static inline void write_back_motion(H264Context *h, int mb_type){
1316 MpegEncContext * const s = &h->s;
1317 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1318 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1321 if(!USES_LIST(mb_type, 0))
1322 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1324 for(list=0; list<h->list_count; list++){
1326 if(!USES_LIST(mb_type, list))
1330 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1331 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1333 if( h->pps.cabac ) {
1334 if(IS_SKIP(mb_type))
1335 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1338 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1339 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1344 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1345 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1346 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1347 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1348 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1352 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1353 if(IS_8X8(mb_type)){
1354 uint8_t *direct_table = &h->direct_table[b8_xy];
1355 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1356 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1357 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1363 * Decodes a network abstraction layer unit.
1364 * @param consumed is the number of bytes used as input
1365 * @param length is the length of the array
1366 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1367 * @returns decoded bytes, might be src+1 if no escapes
1369 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1374 // src[0]&0x80; //forbidden bit
1375 h->nal_ref_idc= src[0]>>5;
1376 h->nal_unit_type= src[0]&0x1F;
1380 for(i=0; i<length; i++)
1381 printf("%2X ", src[i]);
1384 #if HAVE_FAST_UNALIGNED
1385 # if HAVE_FAST_64BIT
1387 for(i=0; i+1<length; i+=9){
1388 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1391 for(i=0; i+1<length; i+=5){
1392 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1395 if(i>0 && !src[i]) i--;
1399 for(i=0; i+1<length; i+=2){
1400 if(src[i]) continue;
1401 if(i>0 && src[i-1]==0) i--;
1403 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1405 /* startcode, so we must be past the end */
1413 if(i>=length-1){ //no escaped 0
1414 *dst_length= length;
1415 *consumed= length+1; //+1 for the header
1419 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1420 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1421 dst= h->rbsp_buffer[bufidx];
1427 //printf("decoding esc\n");
1428 memcpy(dst, src, i);
1431 //remove escapes (very rare 1:2^22)
1433 dst[di++]= src[si++];
1434 dst[di++]= src[si++];
1435 }else if(src[si]==0 && src[si+1]==0){
1436 if(src[si+2]==3){ //escape
1441 }else //next start code
1445 dst[di++]= src[si++];
1448 dst[di++]= src[si++];
1451 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1454 *consumed= si + 1;//+1 for the header
1455 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1460 * identifies the exact end of the bitstream
1461 * @return the length of the trailing, or 0 if damaged
1463 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1467 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1477 * IDCT transforms the 16 dc values and dequantizes them.
1478 * @param qp quantization parameter
1480 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1483 int temp[16]; //FIXME check if this is a good idea
1484 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1485 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1487 //memset(block, 64, 2*256);
1490 const int offset= y_offset[i];
1491 const int z0= block[offset+stride*0] + block[offset+stride*4];
1492 const int z1= block[offset+stride*0] - block[offset+stride*4];
1493 const int z2= block[offset+stride*1] - block[offset+stride*5];
1494 const int z3= block[offset+stride*1] + block[offset+stride*5];
1503 const int offset= x_offset[i];
1504 const int z0= temp[4*0+i] + temp[4*2+i];
1505 const int z1= temp[4*0+i] - temp[4*2+i];
1506 const int z2= temp[4*1+i] - temp[4*3+i];
1507 const int z3= temp[4*1+i] + temp[4*3+i];
1509 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1510 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1511 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1512 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1518 * DCT transforms the 16 dc values.
1519 * @param qp quantization parameter ??? FIXME
1521 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1522 // const int qmul= dequant_coeff[qp][0];
1524 int temp[16]; //FIXME check if this is a good idea
1525 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1526 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1529 const int offset= y_offset[i];
1530 const int z0= block[offset+stride*0] + block[offset+stride*4];
1531 const int z1= block[offset+stride*0] - block[offset+stride*4];
1532 const int z2= block[offset+stride*1] - block[offset+stride*5];
1533 const int z3= block[offset+stride*1] + block[offset+stride*5];
1542 const int offset= x_offset[i];
1543 const int z0= temp[4*0+i] + temp[4*2+i];
1544 const int z1= temp[4*0+i] - temp[4*2+i];
1545 const int z2= temp[4*1+i] - temp[4*3+i];
1546 const int z3= temp[4*1+i] + temp[4*3+i];
1548 block[stride*0 +offset]= (z0 + z3)>>1;
1549 block[stride*2 +offset]= (z1 + z2)>>1;
1550 block[stride*8 +offset]= (z1 - z2)>>1;
1551 block[stride*10+offset]= (z0 - z3)>>1;
1559 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1560 const int stride= 16*2;
1561 const int xStride= 16;
1564 a= block[stride*0 + xStride*0];
1565 b= block[stride*0 + xStride*1];
1566 c= block[stride*1 + xStride*0];
1567 d= block[stride*1 + xStride*1];
1574 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1575 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1576 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1577 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1581 static void chroma_dc_dct_c(DCTELEM *block){
1582 const int stride= 16*2;
1583 const int xStride= 16;
1586 a= block[stride*0 + xStride*0];
1587 b= block[stride*0 + xStride*1];
1588 c= block[stride*1 + xStride*0];
1589 d= block[stride*1 + xStride*1];
1596 block[stride*0 + xStride*0]= (a+c);
1597 block[stride*0 + xStride*1]= (e+b);
1598 block[stride*1 + xStride*0]= (a-c);
1599 block[stride*1 + xStride*1]= (e-b);
1604 * gets the chroma qp.
1606 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1607 return h->pps.chroma_qp_table[t][qscale];
1610 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1611 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1612 int src_x_offset, int src_y_offset,
1613 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1614 MpegEncContext * const s = &h->s;
1615 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1616 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1617 const int luma_xy= (mx&3) + ((my&3)<<2);
1618 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1619 uint8_t * src_cb, * src_cr;
1620 int extra_width= h->emu_edge_width;
1621 int extra_height= h->emu_edge_height;
1623 const int full_mx= mx>>2;
1624 const int full_my= my>>2;
1625 const int pic_width = 16*s->mb_width;
1626 const int pic_height = 16*s->mb_height >> MB_FIELD;
1628 if(mx&7) extra_width -= 3;
1629 if(my&7) extra_height -= 3;
1631 if( full_mx < 0-extra_width
1632 || full_my < 0-extra_height
1633 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1634 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1635 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1636 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1640 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1642 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1645 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1648 // chroma offset when predicting from a field of opposite parity
1649 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1650 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1652 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1656 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1657 src_cb= s->edge_emu_buffer;
1659 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1662 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1663 src_cr= s->edge_emu_buffer;
1665 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1668 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1669 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1670 int x_offset, int y_offset,
1671 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1672 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1673 int list0, int list1){
1674 MpegEncContext * const s = &h->s;
1675 qpel_mc_func *qpix_op= qpix_put;
1676 h264_chroma_mc_func chroma_op= chroma_put;
1678 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1679 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1680 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1681 x_offset += 8*s->mb_x;
1682 y_offset += 8*(s->mb_y >> MB_FIELD);
1685 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1686 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1687 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1688 qpix_op, chroma_op);
1691 chroma_op= chroma_avg;
1695 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1696 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1697 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1698 qpix_op, chroma_op);
1702 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1703 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1704 int x_offset, int y_offset,
1705 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1706 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1707 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1708 int list0, int list1){
1709 MpegEncContext * const s = &h->s;
1711 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1712 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1713 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1714 x_offset += 8*s->mb_x;
1715 y_offset += 8*(s->mb_y >> MB_FIELD);
1718 /* don't optimize for luma-only case, since B-frames usually
1719 * use implicit weights => chroma too. */
1720 uint8_t *tmp_cb = s->obmc_scratchpad;
1721 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1722 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1723 int refn0 = h->ref_cache[0][ scan8[n] ];
1724 int refn1 = h->ref_cache[1][ scan8[n] ];
1726 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1727 dest_y, dest_cb, dest_cr,
1728 x_offset, y_offset, qpix_put, chroma_put);
1729 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1730 tmp_y, tmp_cb, tmp_cr,
1731 x_offset, y_offset, qpix_put, chroma_put);
1733 if(h->use_weight == 2){
1734 int weight0 = h->implicit_weight[refn0][refn1];
1735 int weight1 = 64 - weight0;
1736 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1737 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1740 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1741 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1742 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1743 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1744 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1745 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1746 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1748 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1751 int list = list1 ? 1 : 0;
1752 int refn = h->ref_cache[list][ scan8[n] ];
1753 Picture *ref= &h->ref_list[list][refn];
1754 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1755 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1756 qpix_put, chroma_put);
1758 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1759 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1760 if(h->use_weight_chroma){
1761 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1762 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1763 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1764 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1769 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1770 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1771 int x_offset, int y_offset,
1772 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1773 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1774 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1775 int list0, int list1){
1776 if((h->use_weight==2 && list0 && list1
1777 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1778 || h->use_weight==1)
1779 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1780 x_offset, y_offset, qpix_put, chroma_put,
1781 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1783 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1784 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1787 static inline void prefetch_motion(H264Context *h, int list){
1788 /* fetch pixels for estimated mv 4 macroblocks ahead
1789 * optimized for 64byte cache lines */
1790 MpegEncContext * const s = &h->s;
1791 const int refn = h->ref_cache[list][scan8[0]];
1793 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1794 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1795 uint8_t **src= h->ref_list[list][refn].data;
1796 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1797 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1798 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1799 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1803 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1804 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1805 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1806 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1807 MpegEncContext * const s = &h->s;
1808 const int mb_xy= h->mb_xy;
1809 const int mb_type= s->current_picture.mb_type[mb_xy];
1811 assert(IS_INTER(mb_type));
1813 prefetch_motion(h, 0);
1815 if(IS_16X16(mb_type)){
1816 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1817 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1818 &weight_op[0], &weight_avg[0],
1819 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1820 }else if(IS_16X8(mb_type)){
1821 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1822 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1823 &weight_op[1], &weight_avg[1],
1824 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1825 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1826 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1827 &weight_op[1], &weight_avg[1],
1828 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1829 }else if(IS_8X16(mb_type)){
1830 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1831 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1832 &weight_op[2], &weight_avg[2],
1833 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1834 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1835 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1836 &weight_op[2], &weight_avg[2],
1837 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1841 assert(IS_8X8(mb_type));
1844 const int sub_mb_type= h->sub_mb_type[i];
1846 int x_offset= (i&1)<<2;
1847 int y_offset= (i&2)<<1;
1849 if(IS_SUB_8X8(sub_mb_type)){
1850 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1851 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1852 &weight_op[3], &weight_avg[3],
1853 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1854 }else if(IS_SUB_8X4(sub_mb_type)){
1855 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1856 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1857 &weight_op[4], &weight_avg[4],
1858 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1859 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1860 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1861 &weight_op[4], &weight_avg[4],
1862 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1863 }else if(IS_SUB_4X8(sub_mb_type)){
1864 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1865 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1866 &weight_op[5], &weight_avg[5],
1867 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1868 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1869 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1870 &weight_op[5], &weight_avg[5],
1871 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1874 assert(IS_SUB_4X4(sub_mb_type));
1876 int sub_x_offset= x_offset + 2*(j&1);
1877 int sub_y_offset= y_offset + (j&2);
1878 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1879 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1880 &weight_op[6], &weight_avg[6],
1881 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1887 prefetch_motion(h, 1);
1890 static av_cold void init_cavlc_level_tab(void){
1891 int suffix_length, mask;
1894 for(suffix_length=0; suffix_length<7; suffix_length++){
1895 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1896 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1897 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1899 mask= -(level_code&1);
1900 level_code= (((2+level_code)>>1) ^ mask) - mask;
1901 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1902 cavlc_level_tab[suffix_length][i][0]= level_code;
1903 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1904 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1905 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1906 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1908 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1909 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1915 static av_cold void decode_init_vlc(void){
1916 static int done = 0;
1923 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1924 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1925 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1926 &chroma_dc_coeff_token_len [0], 1, 1,
1927 &chroma_dc_coeff_token_bits[0], 1, 1,
1928 INIT_VLC_USE_NEW_STATIC);
1932 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1933 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1934 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1935 &coeff_token_len [i][0], 1, 1,
1936 &coeff_token_bits[i][0], 1, 1,
1937 INIT_VLC_USE_NEW_STATIC);
1938 offset += coeff_token_vlc_tables_size[i];
1941 * This is a one time safety check to make sure that
1942 * the packed static coeff_token_vlc table sizes
1943 * were initialized correctly.
1945 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1948 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1949 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1950 init_vlc(&chroma_dc_total_zeros_vlc[i],
1951 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1952 &chroma_dc_total_zeros_len [i][0], 1, 1,
1953 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1954 INIT_VLC_USE_NEW_STATIC);
1956 for(i=0; i<15; i++){
1957 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1958 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1959 init_vlc(&total_zeros_vlc[i],
1960 TOTAL_ZEROS_VLC_BITS, 16,
1961 &total_zeros_len [i][0], 1, 1,
1962 &total_zeros_bits[i][0], 1, 1,
1963 INIT_VLC_USE_NEW_STATIC);
1967 run_vlc[i].table = run_vlc_tables[i];
1968 run_vlc[i].table_allocated = run_vlc_tables_size;
1969 init_vlc(&run_vlc[i],
1971 &run_len [i][0], 1, 1,
1972 &run_bits[i][0], 1, 1,
1973 INIT_VLC_USE_NEW_STATIC);
1975 run7_vlc.table = run7_vlc_table,
1976 run7_vlc.table_allocated = run7_vlc_table_size;
1977 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1978 &run_len [6][0], 1, 1,
1979 &run_bits[6][0], 1, 1,
1980 INIT_VLC_USE_NEW_STATIC);
1982 init_cavlc_level_tab();
1986 static void free_tables(H264Context *h){
1989 av_freep(&h->intra4x4_pred_mode);
1990 av_freep(&h->chroma_pred_mode_table);
1991 av_freep(&h->cbp_table);
1992 av_freep(&h->mvd_table[0]);
1993 av_freep(&h->mvd_table[1]);
1994 av_freep(&h->direct_table);
1995 av_freep(&h->non_zero_count);
1996 av_freep(&h->slice_table_base);
1997 h->slice_table= NULL;
1999 av_freep(&h->mb2b_xy);
2000 av_freep(&h->mb2b8_xy);
2002 for(i = 0; i < h->s.avctx->thread_count; i++) {
2003 hx = h->thread_context[i];
2005 av_freep(&hx->top_borders[1]);
2006 av_freep(&hx->top_borders[0]);
2007 av_freep(&hx->s.obmc_scratchpad);
2011 static void init_dequant8_coeff_table(H264Context *h){
2013 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2014 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2015 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2017 for(i=0; i<2; i++ ){
2018 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2019 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2023 for(q=0; q<52; q++){
2024 int shift = div6[q];
2027 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2028 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2029 h->pps.scaling_matrix8[i][x]) << shift;
2034 static void init_dequant4_coeff_table(H264Context *h){
2036 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2037 for(i=0; i<6; i++ ){
2038 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2040 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2041 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2048 for(q=0; q<52; q++){
2049 int shift = div6[q] + 2;
2052 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2053 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2054 h->pps.scaling_matrix4[i][x]) << shift;
2059 static void init_dequant_tables(H264Context *h){
2061 init_dequant4_coeff_table(h);
2062 if(h->pps.transform_8x8_mode)
2063 init_dequant8_coeff_table(h);
2064 if(h->sps.transform_bypass){
2067 h->dequant4_coeff[i][0][x] = 1<<6;
2068 if(h->pps.transform_8x8_mode)
2071 h->dequant8_coeff[i][0][x] = 1<<6;
2078 * needs width/height
2080 static int alloc_tables(H264Context *h){
2081 MpegEncContext * const s = &h->s;
2082 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2085 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2087 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2088 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2089 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2091 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2092 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2093 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2094 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2096 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2097 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2099 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2100 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2101 for(y=0; y<s->mb_height; y++){
2102 for(x=0; x<s->mb_width; x++){
2103 const int mb_xy= x + y*s->mb_stride;
2104 const int b_xy = 4*x + 4*y*h->b_stride;
2105 const int b8_xy= 2*x + 2*y*h->b8_stride;
2107 h->mb2b_xy [mb_xy]= b_xy;
2108 h->mb2b8_xy[mb_xy]= b8_xy;
2112 s->obmc_scratchpad = NULL;
2114 if(!h->dequant4_coeff[0])
2115 init_dequant_tables(h);
2124 * Mimic alloc_tables(), but for every context thread.
2126 static void clone_tables(H264Context *dst, H264Context *src){
2127 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2128 dst->non_zero_count = src->non_zero_count;
2129 dst->slice_table = src->slice_table;
2130 dst->cbp_table = src->cbp_table;
2131 dst->mb2b_xy = src->mb2b_xy;
2132 dst->mb2b8_xy = src->mb2b8_xy;
2133 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2134 dst->mvd_table[0] = src->mvd_table[0];
2135 dst->mvd_table[1] = src->mvd_table[1];
2136 dst->direct_table = src->direct_table;
2138 dst->s.obmc_scratchpad = NULL;
2139 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2144 * Allocate buffers which are not shared amongst multiple threads.
2146 static int context_init(H264Context *h){
2147 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2148 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2152 return -1; // free_tables will clean up for us
2155 static av_cold void common_init(H264Context *h){
2156 MpegEncContext * const s = &h->s;
2158 s->width = s->avctx->width;
2159 s->height = s->avctx->height;
2160 s->codec_id= s->avctx->codec->id;
2162 ff_h264_pred_init(&h->hpc, s->codec_id);
2164 h->dequant_coeff_pps= -1;
2165 s->unrestricted_mv=1;
2166 s->decode=1; //FIXME
2168 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2170 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2171 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2174 static av_cold int decode_init(AVCodecContext *avctx){
2175 H264Context *h= avctx->priv_data;
2176 MpegEncContext * const s = &h->s;
2178 MPV_decode_defaults(s);
2183 s->out_format = FMT_H264;
2184 s->workaround_bugs= avctx->workaround_bugs;
2187 // s->decode_mb= ff_h263_decode_mb;
2188 s->quarter_sample = 1;
2191 if(avctx->codec_id == CODEC_ID_SVQ3)
2192 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2193 else if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2194 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2196 avctx->pix_fmt= PIX_FMT_YUV420P;
2200 if(avctx->extradata_size > 0 && avctx->extradata &&
2201 *(char *)avctx->extradata == 1){
2208 h->thread_context[0] = h;
2209 h->outputed_poc = INT_MIN;
2210 h->prev_poc_msb= 1<<16;
2211 h->sei_recovery_frame_cnt = -1;
2212 h->sei_dpb_output_delay = 0;
2213 h->sei_cpb_removal_delay = -1;
2214 h->sei_buffering_period_present = 0;
2218 static int frame_start(H264Context *h){
2219 MpegEncContext * const s = &h->s;
2222 if(MPV_frame_start(s, s->avctx) < 0)
2224 ff_er_frame_start(s);
2226 * MPV_frame_start uses pict_type to derive key_frame.
2227 * This is incorrect for H.264; IDR markings must be used.
2228 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2229 * See decode_nal_units().
2231 s->current_picture_ptr->key_frame= 0;
2233 assert(s->linesize && s->uvlinesize);
2235 for(i=0; i<16; i++){
2236 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2237 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2240 h->block_offset[16+i]=
2241 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2242 h->block_offset[24+16+i]=
2243 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2246 /* can't be in alloc_tables because linesize isn't known there.
2247 * FIXME: redo bipred weight to not require extra buffer? */
2248 for(i = 0; i < s->avctx->thread_count; i++)
2249 if(!h->thread_context[i]->s.obmc_scratchpad)
2250 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2252 /* some macroblocks will be accessed before they're available */
2253 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2254 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2256 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2258 // We mark the current picture as non-reference after allocating it, so
2259 // that if we break out due to an error it can be released automatically
2260 // in the next MPV_frame_start().
2261 // SVQ3 as well as most other codecs have only last/next/current and thus
2262 // get released even with set reference, besides SVQ3 and others do not
2263 // mark frames as reference later "naturally".
2264 if(s->codec_id != CODEC_ID_SVQ3)
2265 s->current_picture_ptr->reference= 0;
2267 s->current_picture_ptr->field_poc[0]=
2268 s->current_picture_ptr->field_poc[1]= INT_MAX;
2269 assert(s->current_picture_ptr->long_ref==0);
2274 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2275 MpegEncContext * const s = &h->s;
2284 src_cb -= uvlinesize;
2285 src_cr -= uvlinesize;
2287 if(!simple && FRAME_MBAFF){
2289 offset = MB_MBAFF ? 1 : 17;
2290 uvoffset= MB_MBAFF ? 1 : 9;
2292 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2293 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2294 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2301 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2302 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2303 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2304 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2310 top_idx = MB_MBAFF ? 0 : 1;
2312 step= MB_MBAFF ? 2 : 1;
2315 // There are two lines saved, the line above the the top macroblock of a pair,
2316 // and the line above the bottom macroblock
2317 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2318 for(i=1; i<17 - skiplast; i++){
2319 h->left_border[offset+i*step]= src_y[15+i* linesize];
2322 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2323 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2325 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2326 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2327 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2328 for(i=1; i<9 - skiplast; i++){
2329 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2330 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2332 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2333 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2337 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2338 MpegEncContext * const s = &h->s;
2349 if(!simple && FRAME_MBAFF){
2351 offset = MB_MBAFF ? 1 : 17;
2352 uvoffset= MB_MBAFF ? 1 : 9;
2356 top_idx = MB_MBAFF ? 0 : 1;
2358 step= MB_MBAFF ? 2 : 1;
2361 if(h->deblocking_filter == 2) {
2363 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2364 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2366 deblock_left = (s->mb_x > 0);
2367 deblock_top = (s->mb_y > !!MB_FIELD);
2370 src_y -= linesize + 1;
2371 src_cb -= uvlinesize + 1;
2372 src_cr -= uvlinesize + 1;
2374 #define XCHG(a,b,t,xchg)\
2381 for(i = !deblock_top; i<16; i++){
2382 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2384 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2388 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2389 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2390 if(s->mb_x+1 < s->mb_width){
2391 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2395 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2397 for(i = !deblock_top; i<8; i++){
2398 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2399 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2401 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2402 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2405 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2406 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2411 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2412 MpegEncContext * const s = &h->s;
2413 const int mb_x= s->mb_x;
2414 const int mb_y= s->mb_y;
2415 const int mb_xy= h->mb_xy;
2416 const int mb_type= s->current_picture.mb_type[mb_xy];
2417 uint8_t *dest_y, *dest_cb, *dest_cr;
2418 int linesize, uvlinesize /*dct_offset*/;
2420 int *block_offset = &h->block_offset[0];
2421 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2422 /* is_h264 should always be true if SVQ3 is disabled. */
2423 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2424 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2425 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2427 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2428 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2429 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2431 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2432 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2434 if (!simple && MB_FIELD) {
2435 linesize = h->mb_linesize = s->linesize * 2;
2436 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2437 block_offset = &h->block_offset[24];
2438 if(mb_y&1){ //FIXME move out of this function?
2439 dest_y -= s->linesize*15;
2440 dest_cb-= s->uvlinesize*7;
2441 dest_cr-= s->uvlinesize*7;
2445 for(list=0; list<h->list_count; list++){
2446 if(!USES_LIST(mb_type, list))
2448 if(IS_16X16(mb_type)){
2449 int8_t *ref = &h->ref_cache[list][scan8[0]];
2450 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2452 for(i=0; i<16; i+=4){
2453 int ref = h->ref_cache[list][scan8[i]];
2455 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2461 linesize = h->mb_linesize = s->linesize;
2462 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2463 // dct_offset = s->linesize * 16;
2466 if (!simple && IS_INTRA_PCM(mb_type)) {
2467 for (i=0; i<16; i++) {
2468 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2470 for (i=0; i<8; i++) {
2471 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2472 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2475 if(IS_INTRA(mb_type)){
2476 if(h->deblocking_filter)
2477 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2479 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2480 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2481 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2484 if(IS_INTRA4x4(mb_type)){
2485 if(simple || !s->encoding){
2486 if(IS_8x8DCT(mb_type)){
2487 if(transform_bypass){
2489 idct_add = s->dsp.add_pixels8;
2491 idct_dc_add = s->dsp.h264_idct8_dc_add;
2492 idct_add = s->dsp.h264_idct8_add;
2494 for(i=0; i<16; i+=4){
2495 uint8_t * const ptr= dest_y + block_offset[i];
2496 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2497 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2498 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2500 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2501 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2502 (h->topright_samples_available<<i)&0x4000, linesize);
2504 if(nnz == 1 && h->mb[i*16])
2505 idct_dc_add(ptr, h->mb + i*16, linesize);
2507 idct_add (ptr, h->mb + i*16, linesize);
2512 if(transform_bypass){
2514 idct_add = s->dsp.add_pixels4;
2516 idct_dc_add = s->dsp.h264_idct_dc_add;
2517 idct_add = s->dsp.h264_idct_add;
2519 for(i=0; i<16; i++){
2520 uint8_t * const ptr= dest_y + block_offset[i];
2521 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2523 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2524 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2528 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2529 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2530 assert(mb_y || linesize <= block_offset[i]);
2531 if(!topright_avail){
2532 tr= ptr[3 - linesize]*0x01010101;
2533 topright= (uint8_t*) &tr;
2535 topright= ptr + 4 - linesize;
2539 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2540 nnz = h->non_zero_count_cache[ scan8[i] ];
2543 if(nnz == 1 && h->mb[i*16])
2544 idct_dc_add(ptr, h->mb + i*16, linesize);
2546 idct_add (ptr, h->mb + i*16, linesize);
2548 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2555 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2557 if(!transform_bypass)
2558 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2560 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2562 if(h->deblocking_filter)
2563 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2565 hl_motion(h, dest_y, dest_cb, dest_cr,
2566 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2567 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2568 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2572 if(!IS_INTRA4x4(mb_type)){
2574 if(IS_INTRA16x16(mb_type)){
2575 if(transform_bypass){
2576 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2577 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2579 for(i=0; i<16; i++){
2580 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2581 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2585 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2587 }else if(h->cbp&15){
2588 if(transform_bypass){
2589 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2590 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2591 for(i=0; i<16; i+=di){
2592 if(h->non_zero_count_cache[ scan8[i] ]){
2593 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2597 if(IS_8x8DCT(mb_type)){
2598 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2600 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2605 for(i=0; i<16; i++){
2606 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2607 uint8_t * const ptr= dest_y + block_offset[i];
2608 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2614 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2615 uint8_t *dest[2] = {dest_cb, dest_cr};
2616 if(transform_bypass){
2617 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2618 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2619 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2621 idct_add = s->dsp.add_pixels4;
2622 for(i=16; i<16+8; i++){
2623 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2624 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2628 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2629 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2631 idct_add = s->dsp.h264_idct_add;
2632 idct_dc_add = s->dsp.h264_idct_dc_add;
2633 for(i=16; i<16+8; i++){
2634 if(h->non_zero_count_cache[ scan8[i] ])
2635 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2636 else if(h->mb[i*16])
2637 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2640 for(i=16; i<16+8; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2642 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2643 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2650 if(h->cbp || IS_INTRA(mb_type))
2651 s->dsp.clear_blocks(h->mb);
2653 if(h->deblocking_filter) {
2654 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2655 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2656 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2657 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2658 if (!simple && FRAME_MBAFF) {
2659 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2661 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2667 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2669 static void hl_decode_mb_simple(H264Context *h){
2670 hl_decode_mb_internal(h, 1);
2674 * Process a macroblock; this handles edge cases, such as interlacing.
2676 static void av_noinline hl_decode_mb_complex(H264Context *h){
2677 hl_decode_mb_internal(h, 0);
2680 static void hl_decode_mb(H264Context *h){
2681 MpegEncContext * const s = &h->s;
2682 const int mb_xy= h->mb_xy;
2683 const int mb_type= s->current_picture.mb_type[mb_xy];
2684 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2687 hl_decode_mb_complex(h);
2688 else hl_decode_mb_simple(h);
2691 static void pic_as_field(Picture *pic, const int parity){
2693 for (i = 0; i < 4; ++i) {
2694 if (parity == PICT_BOTTOM_FIELD)
2695 pic->data[i] += pic->linesize[i];
2696 pic->reference = parity;
2697 pic->linesize[i] *= 2;
2699 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2702 static int split_field_copy(Picture *dest, Picture *src,
2703 int parity, int id_add){
2704 int match = !!(src->reference & parity);
2708 if(parity != PICT_FRAME){
2709 pic_as_field(dest, parity);
2711 dest->pic_id += id_add;
2718 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2722 while(i[0]<len || i[1]<len){
2723 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2725 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2728 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2729 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2732 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2733 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2740 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2745 best_poc= dir ? INT_MIN : INT_MAX;
2747 for(i=0; i<len; i++){
2748 const int poc= src[i]->poc;
2749 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2751 sorted[out_i]= src[i];
2754 if(best_poc == (dir ? INT_MIN : INT_MAX))
2756 limit= sorted[out_i++]->poc - dir;
2762 * fills the default_ref_list.
2764 static int fill_default_ref_list(H264Context *h){
2765 MpegEncContext * const s = &h->s;
2768 if(h->slice_type_nos==FF_B_TYPE){
2769 Picture *sorted[32];
2774 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2776 cur_poc= s->current_picture_ptr->poc;
2778 for(list= 0; list<2; list++){
2779 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2780 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2782 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2783 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2786 if(len < h->ref_count[list])
2787 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2791 if(lens[0] == lens[1] && lens[1] > 1){
2792 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2794 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2797 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2798 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2800 if(len < h->ref_count[0])
2801 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2804 for (i=0; i<h->ref_count[0]; i++) {
2805 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2807 if(h->slice_type_nos==FF_B_TYPE){
2808 for (i=0; i<h->ref_count[1]; i++) {
2809 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2816 static void print_short_term(H264Context *h);
2817 static void print_long_term(H264Context *h);
2820 * Extract structure information about the picture described by pic_num in
2821 * the current decoding context (frame or field). Note that pic_num is
2822 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2823 * @param pic_num picture number for which to extract structure information
2824 * @param structure one of PICT_XXX describing structure of picture
2826 * @return frame number (short term) or long term index of picture
2827 * described by pic_num
2829 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2830 MpegEncContext * const s = &h->s;
2832 *structure = s->picture_structure;
2835 /* opposite field */
2836 *structure ^= PICT_FRAME;
2843 static int decode_ref_pic_list_reordering(H264Context *h){
2844 MpegEncContext * const s = &h->s;
2845 int list, index, pic_structure;
2847 print_short_term(h);
2850 for(list=0; list<h->list_count; list++){
2851 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2853 if(get_bits1(&s->gb)){
2854 int pred= h->curr_pic_num;
2856 for(index=0; ; index++){
2857 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2858 unsigned int pic_id;
2860 Picture *ref = NULL;
2862 if(reordering_of_pic_nums_idc==3)
2865 if(index >= h->ref_count[list]){
2866 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2870 if(reordering_of_pic_nums_idc<3){
2871 if(reordering_of_pic_nums_idc<2){
2872 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2875 if(abs_diff_pic_num > h->max_pic_num){
2876 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2880 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2881 else pred+= abs_diff_pic_num;
2882 pred &= h->max_pic_num - 1;
2884 frame_num = pic_num_extract(h, pred, &pic_structure);
2886 for(i= h->short_ref_count-1; i>=0; i--){
2887 ref = h->short_ref[i];
2888 assert(ref->reference);
2889 assert(!ref->long_ref);
2891 ref->frame_num == frame_num &&
2892 (ref->reference & pic_structure)
2900 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2902 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2905 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2908 ref = h->long_ref[long_idx];
2909 assert(!(ref && !ref->reference));
2910 if(ref && (ref->reference & pic_structure)){
2911 ref->pic_id= pic_id;
2912 assert(ref->long_ref);
2920 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2921 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2923 for(i=index; i+1<h->ref_count[list]; i++){
2924 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2927 for(; i > index; i--){
2928 h->ref_list[list][i]= h->ref_list[list][i-1];
2930 h->ref_list[list][index]= *ref;
2932 pic_as_field(&h->ref_list[list][index], pic_structure);
2936 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2942 for(list=0; list<h->list_count; list++){
2943 for(index= 0; index < h->ref_count[list]; index++){
2944 if(!h->ref_list[list][index].data[0]){
2945 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2946 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2954 static void fill_mbaff_ref_list(H264Context *h){
2956 for(list=0; list<2; list++){ //FIXME try list_count
2957 for(i=0; i<h->ref_count[list]; i++){
2958 Picture *frame = &h->ref_list[list][i];
2959 Picture *field = &h->ref_list[list][16+2*i];
2962 field[0].linesize[j] <<= 1;
2963 field[0].reference = PICT_TOP_FIELD;
2964 field[0].poc= field[0].field_poc[0];
2965 field[1] = field[0];
2967 field[1].data[j] += frame->linesize[j];
2968 field[1].reference = PICT_BOTTOM_FIELD;
2969 field[1].poc= field[1].field_poc[1];
2971 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2972 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2974 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2975 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2979 for(j=0; j<h->ref_count[1]; j++){
2980 for(i=0; i<h->ref_count[0]; i++)
2981 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2982 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2983 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2987 static int pred_weight_table(H264Context *h){
2988 MpegEncContext * const s = &h->s;
2990 int luma_def, chroma_def;
2993 h->use_weight_chroma= 0;
2994 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2995 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2996 luma_def = 1<<h->luma_log2_weight_denom;
2997 chroma_def = 1<<h->chroma_log2_weight_denom;
2999 for(list=0; list<2; list++){
3000 h->luma_weight_flag[list] = 0;
3001 h->chroma_weight_flag[list] = 0;
3002 for(i=0; i<h->ref_count[list]; i++){
3003 int luma_weight_flag, chroma_weight_flag;
3005 luma_weight_flag= get_bits1(&s->gb);
3006 if(luma_weight_flag){
3007 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3008 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3009 if( h->luma_weight[list][i] != luma_def
3010 || h->luma_offset[list][i] != 0) {
3012 h->luma_weight_flag[list]= 1;
3015 h->luma_weight[list][i]= luma_def;
3016 h->luma_offset[list][i]= 0;
3020 chroma_weight_flag= get_bits1(&s->gb);
3021 if(chroma_weight_flag){
3024 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3025 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3026 if( h->chroma_weight[list][i][j] != chroma_def
3027 || h->chroma_offset[list][i][j] != 0) {
3028 h->use_weight_chroma= 1;
3029 h->chroma_weight_flag[list]= 1;
3035 h->chroma_weight[list][i][j]= chroma_def;
3036 h->chroma_offset[list][i][j]= 0;
3041 if(h->slice_type_nos != FF_B_TYPE) break;
3043 h->use_weight= h->use_weight || h->use_weight_chroma;
3047 static void implicit_weight_table(H264Context *h){
3048 MpegEncContext * const s = &h->s;
3050 int cur_poc = s->current_picture_ptr->poc;
3052 for (i = 0; i < 2; i++) {
3053 h->luma_weight_flag[i] = 0;
3054 h->chroma_weight_flag[i] = 0;
3057 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3058 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3060 h->use_weight_chroma= 0;
3065 h->use_weight_chroma= 2;
3066 h->luma_log2_weight_denom= 5;
3067 h->chroma_log2_weight_denom= 5;
3069 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3070 int poc0 = h->ref_list[0][ref0].poc;
3071 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3072 int poc1 = h->ref_list[1][ref1].poc;
3073 int td = av_clip(poc1 - poc0, -128, 127);
3075 int tb = av_clip(cur_poc - poc0, -128, 127);
3076 int tx = (16384 + (FFABS(td) >> 1)) / td;
3077 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3078 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3079 h->implicit_weight[ref0][ref1] = 32;
3081 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3083 h->implicit_weight[ref0][ref1] = 32;
3089 * Mark a picture as no longer needed for reference. The refmask
3090 * argument allows unreferencing of individual fields or the whole frame.
3091 * If the picture becomes entirely unreferenced, but is being held for
3092 * display purposes, it is marked as such.
3093 * @param refmask mask of fields to unreference; the mask is bitwise
3094 * anded with the reference marking of pic
3095 * @return non-zero if pic becomes entirely unreferenced (except possibly
3096 * for display purposes) zero if one of the fields remains in
3099 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3101 if (pic->reference &= refmask) {
3104 for(i = 0; h->delayed_pic[i]; i++)
3105 if(pic == h->delayed_pic[i]){
3106 pic->reference=DELAYED_PIC_REF;
3114 * instantaneous decoder refresh.
3116 static void idr(H264Context *h){
3119 for(i=0; i<16; i++){
3120 remove_long(h, i, 0);
3122 assert(h->long_ref_count==0);
3124 for(i=0; i<h->short_ref_count; i++){
3125 unreference_pic(h, h->short_ref[i], 0);
3126 h->short_ref[i]= NULL;
3128 h->short_ref_count=0;
3129 h->prev_frame_num= 0;
3130 h->prev_frame_num_offset= 0;
3135 /* forget old pics after a seek */
3136 static void flush_dpb(AVCodecContext *avctx){
3137 H264Context *h= avctx->priv_data;
3139 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3140 if(h->delayed_pic[i])
3141 h->delayed_pic[i]->reference= 0;
3142 h->delayed_pic[i]= NULL;
3144 h->outputed_poc= INT_MIN;
3146 if(h->s.current_picture_ptr)
3147 h->s.current_picture_ptr->reference= 0;
3148 h->s.first_field= 0;
3149 h->sei_recovery_frame_cnt = -1;
3150 h->sei_dpb_output_delay = 0;
3151 h->sei_cpb_removal_delay = -1;
3152 h->sei_buffering_period_present = 0;
3153 ff_mpeg_flush(avctx);
3157 * Find a Picture in the short term reference list by frame number.
3158 * @param frame_num frame number to search for
3159 * @param idx the index into h->short_ref where returned picture is found
3160 * undefined if no picture found.
3161 * @return pointer to the found picture, or NULL if no pic with the provided
3162 * frame number is found
3164 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3165 MpegEncContext * const s = &h->s;
3168 for(i=0; i<h->short_ref_count; i++){
3169 Picture *pic= h->short_ref[i];
3170 if(s->avctx->debug&FF_DEBUG_MMCO)
3171 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3172 if(pic->frame_num == frame_num) {
3181 * Remove a picture from the short term reference list by its index in
3182 * that list. This does no checking on the provided index; it is assumed
3183 * to be valid. Other list entries are shifted down.
3184 * @param i index into h->short_ref of picture to remove.
3186 static void remove_short_at_index(H264Context *h, int i){
3187 assert(i >= 0 && i < h->short_ref_count);
3188 h->short_ref[i]= NULL;
3189 if (--h->short_ref_count)
3190 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3195 * @return the removed picture or NULL if an error occurs
3197 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3198 MpegEncContext * const s = &h->s;
3202 if(s->avctx->debug&FF_DEBUG_MMCO)
3203 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3205 pic = find_short(h, frame_num, &i);
3207 if(unreference_pic(h, pic, ref_mask))
3208 remove_short_at_index(h, i);
3215 * Remove a picture from the long term reference list by its index in
3217 * @return the removed picture or NULL if an error occurs
3219 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3222 pic= h->long_ref[i];
3224 if(unreference_pic(h, pic, ref_mask)){
3225 assert(h->long_ref[i]->long_ref == 1);
3226 h->long_ref[i]->long_ref= 0;
3227 h->long_ref[i]= NULL;
3228 h->long_ref_count--;
3236 * print short term list
3238 static void print_short_term(H264Context *h) {
3240 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3241 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3242 for(i=0; i<h->short_ref_count; i++){
3243 Picture *pic= h->short_ref[i];
3244 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3250 * print long term list
3252 static void print_long_term(H264Context *h) {
3254 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3255 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3256 for(i = 0; i < 16; i++){
3257 Picture *pic= h->long_ref[i];
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3266 * Executes the reference picture marking (memory management control operations).
3268 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3269 MpegEncContext * const s = &h->s;
3271 int current_ref_assigned=0;
3274 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3275 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3277 for(i=0; i<mmco_count; i++){
3278 int structure, frame_num;
3279 if(s->avctx->debug&FF_DEBUG_MMCO)
3280 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3282 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3283 || mmco[i].opcode == MMCO_SHORT2LONG){
3284 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3285 pic = find_short(h, frame_num, &j);
3287 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3288 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3289 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3294 switch(mmco[i].opcode){
3295 case MMCO_SHORT2UNUSED:
3296 if(s->avctx->debug&FF_DEBUG_MMCO)
3297 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3298 remove_short(h, frame_num, structure ^ PICT_FRAME);
3300 case MMCO_SHORT2LONG:
3301 if (h->long_ref[mmco[i].long_arg] != pic)
3302 remove_long(h, mmco[i].long_arg, 0);
3304 remove_short_at_index(h, j);
3305 h->long_ref[ mmco[i].long_arg ]= pic;
3306 if (h->long_ref[ mmco[i].long_arg ]){
3307 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3308 h->long_ref_count++;
3311 case MMCO_LONG2UNUSED:
3312 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3313 pic = h->long_ref[j];
3315 remove_long(h, j, structure ^ PICT_FRAME);
3316 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3317 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3320 // Comment below left from previous code as it is an interresting note.
3321 /* First field in pair is in short term list or
3322 * at a different long term index.
3323 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3324 * Report the problem and keep the pair where it is,
3325 * and mark this field valid.
3328 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3329 remove_long(h, mmco[i].long_arg, 0);
3331 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3332 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3333 h->long_ref_count++;
3336 s->current_picture_ptr->reference |= s->picture_structure;
3337 current_ref_assigned=1;
3339 case MMCO_SET_MAX_LONG:
3340 assert(mmco[i].long_arg <= 16);
3341 // just remove the long term which index is greater than new max
3342 for(j = mmco[i].long_arg; j<16; j++){
3343 remove_long(h, j, 0);
3347 while(h->short_ref_count){
3348 remove_short(h, h->short_ref[0]->frame_num, 0);
3350 for(j = 0; j < 16; j++) {
3351 remove_long(h, j, 0);
3353 s->current_picture_ptr->poc=
3354 s->current_picture_ptr->field_poc[0]=
3355 s->current_picture_ptr->field_poc[1]=
3359 s->current_picture_ptr->frame_num= 0;
3365 if (!current_ref_assigned) {
3366 /* Second field of complementary field pair; the first field of
3367 * which is already referenced. If short referenced, it
3368 * should be first entry in short_ref. If not, it must exist
3369 * in long_ref; trying to put it on the short list here is an
3370 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3372 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3373 /* Just mark the second field valid */
3374 s->current_picture_ptr->reference = PICT_FRAME;
3375 } else if (s->current_picture_ptr->long_ref) {
3376 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3377 "assignment for second field "
3378 "in complementary field pair "
3379 "(first field is long term)\n");
3381 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3383 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3386 if(h->short_ref_count)
3387 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3389 h->short_ref[0]= s->current_picture_ptr;
3390 h->short_ref_count++;
3391 s->current_picture_ptr->reference |= s->picture_structure;
3395 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3397 /* We have too many reference frames, probably due to corrupted
3398 * stream. Need to discard one frame. Prevents overrun of the
3399 * short_ref and long_ref buffers.
3401 av_log(h->s.avctx, AV_LOG_ERROR,
3402 "number of reference frames exceeds max (probably "
3403 "corrupt input), discarding one\n");
3405 if (h->long_ref_count && !h->short_ref_count) {
3406 for (i = 0; i < 16; ++i)
3411 remove_long(h, i, 0);
3413 pic = h->short_ref[h->short_ref_count - 1];
3414 remove_short(h, pic->frame_num, 0);
3418 print_short_term(h);
3423 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3424 MpegEncContext * const s = &h->s;
3428 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3429 s->broken_link= get_bits1(gb) -1;
3431 h->mmco[0].opcode= MMCO_LONG;
3432 h->mmco[0].long_arg= 0;
3436 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3437 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3438 MMCOOpcode opcode= get_ue_golomb_31(gb);
3440 h->mmco[i].opcode= opcode;
3441 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3442 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3443 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3444 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3448 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3449 unsigned int long_arg= get_ue_golomb_31(gb);
3450 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3451 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3454 h->mmco[i].long_arg= long_arg;
3457 if(opcode > (unsigned)MMCO_LONG){
3458 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3461 if(opcode == MMCO_END)
3466 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3468 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3469 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3470 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3471 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3473 if (FIELD_PICTURE) {
3474 h->mmco[0].short_pic_num *= 2;
3475 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3476 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3486 static int init_poc(H264Context *h){
3487 MpegEncContext * const s = &h->s;
3488 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3490 Picture *cur = s->current_picture_ptr;
3492 h->frame_num_offset= h->prev_frame_num_offset;
3493 if(h->frame_num < h->prev_frame_num)
3494 h->frame_num_offset += max_frame_num;
3496 if(h->sps.poc_type==0){
3497 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3499 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3500 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3501 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3502 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3504 h->poc_msb = h->prev_poc_msb;
3505 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3507 field_poc[1] = h->poc_msb + h->poc_lsb;
3508 if(s->picture_structure == PICT_FRAME)
3509 field_poc[1] += h->delta_poc_bottom;
3510 }else if(h->sps.poc_type==1){
3511 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3514 if(h->sps.poc_cycle_length != 0)
3515 abs_frame_num = h->frame_num_offset + h->frame_num;
3519 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3522 expected_delta_per_poc_cycle = 0;
3523 for(i=0; i < h->sps.poc_cycle_length; i++)
3524 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3526 if(abs_frame_num > 0){
3527 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3528 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3530 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3531 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3532 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3536 if(h->nal_ref_idc == 0)
3537 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3539 field_poc[0] = expectedpoc + h->delta_poc[0];
3540 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3542 if(s->picture_structure == PICT_FRAME)
3543 field_poc[1] += h->delta_poc[1];
3545 int poc= 2*(h->frame_num_offset + h->frame_num);
3554 if(s->picture_structure != PICT_BOTTOM_FIELD)
3555 s->current_picture_ptr->field_poc[0]= field_poc[0];
3556 if(s->picture_structure != PICT_TOP_FIELD)
3557 s->current_picture_ptr->field_poc[1]= field_poc[1];
3558 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3565 * initialize scan tables
3567 static void init_scan_tables(H264Context *h){
3568 MpegEncContext * const s = &h->s;
3570 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3571 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3572 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3574 for(i=0; i<16; i++){
3575 #define T(x) (x>>2) | ((x<<2) & 0xF)
3576 h->zigzag_scan[i] = T(zigzag_scan[i]);
3577 h-> field_scan[i] = T( field_scan[i]);
3581 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3582 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3583 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3584 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3585 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3587 for(i=0; i<64; i++){
3588 #define T(x) (x>>3) | ((x&7)<<3)
3589 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3590 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3591 h->field_scan8x8[i] = T(field_scan8x8[i]);
3592 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3596 if(h->sps.transform_bypass){ //FIXME same ugly
3597 h->zigzag_scan_q0 = zigzag_scan;
3598 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3599 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3600 h->field_scan_q0 = field_scan;
3601 h->field_scan8x8_q0 = field_scan8x8;
3602 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3604 h->zigzag_scan_q0 = h->zigzag_scan;
3605 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3606 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3607 h->field_scan_q0 = h->field_scan;
3608 h->field_scan8x8_q0 = h->field_scan8x8;
3609 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3614 * Replicates H264 "master" context to thread contexts.
3616 static void clone_slice(H264Context *dst, H264Context *src)
3618 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3619 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3620 dst->s.current_picture = src->s.current_picture;
3621 dst->s.linesize = src->s.linesize;
3622 dst->s.uvlinesize = src->s.uvlinesize;
3623 dst->s.first_field = src->s.first_field;
3625 dst->prev_poc_msb = src->prev_poc_msb;
3626 dst->prev_poc_lsb = src->prev_poc_lsb;
3627 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3628 dst->prev_frame_num = src->prev_frame_num;
3629 dst->short_ref_count = src->short_ref_count;
3631 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3632 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3633 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3634 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3636 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3637 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3641 * decodes a slice header.
3642 * This will also call MPV_common_init() and frame_start() as needed.
3644 * @param h h264context
3645 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3647 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3649 static int decode_slice_header(H264Context *h, H264Context *h0){
3650 MpegEncContext * const s = &h->s;
3651 MpegEncContext * const s0 = &h0->s;
3652 unsigned int first_mb_in_slice;
3653 unsigned int pps_id;
3654 int num_ref_idx_active_override_flag;
3655 unsigned int slice_type, tmp, i, j;
3656 int default_ref_list_done = 0;
3657 int last_pic_structure;
3659 s->dropable= h->nal_ref_idc == 0;
3661 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3662 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3663 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3665 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3666 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3669 first_mb_in_slice= get_ue_golomb(&s->gb);
3671 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3672 h0->current_slice = 0;
3673 if (!s0->first_field)
3674 s->current_picture_ptr= NULL;
3677 slice_type= get_ue_golomb_31(&s->gb);
3679 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3684 h->slice_type_fixed=1;
3686 h->slice_type_fixed=0;
3688 slice_type= golomb_to_pict_type[ slice_type ];
3689 if (slice_type == FF_I_TYPE
3690 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3691 default_ref_list_done = 1;
3693 h->slice_type= slice_type;
3694 h->slice_type_nos= slice_type & 3;
3696 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3697 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3698 av_log(h->s.avctx, AV_LOG_ERROR,
3699 "B picture before any references, skipping\n");
3703 pps_id= get_ue_golomb(&s->gb);
3704 if(pps_id>=MAX_PPS_COUNT){
3705 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3708 if(!h0->pps_buffers[pps_id]) {
3709 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3712 h->pps= *h0->pps_buffers[pps_id];
3714 if(!h0->sps_buffers[h->pps.sps_id]) {
3715 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3718 h->sps = *h0->sps_buffers[h->pps.sps_id];
3720 if(h == h0 && h->dequant_coeff_pps != pps_id){
3721 h->dequant_coeff_pps = pps_id;
3722 init_dequant_tables(h);
3725 s->mb_width= h->sps.mb_width;
3726 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3728 h->b_stride= s->mb_width*4;
3729 h->b8_stride= s->mb_width*2;
3731 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3732 if(h->sps.frame_mbs_only_flag)
3733 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3735 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3737 if (s->context_initialized
3738 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3740 return -1; // width / height changed during parallelized decoding
3742 flush_dpb(s->avctx);
3745 if (!s->context_initialized) {
3747 return -1; // we cant (re-)initialize context during parallel decoding
3748 if (MPV_common_init(s) < 0)
3752 init_scan_tables(h);
3755 for(i = 1; i < s->avctx->thread_count; i++) {
3757 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3758 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3759 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3762 init_scan_tables(c);
3766 for(i = 0; i < s->avctx->thread_count; i++)
3767 if(context_init(h->thread_context[i]) < 0)
3770 s->avctx->width = s->width;
3771 s->avctx->height = s->height;
3772 s->avctx->sample_aspect_ratio= h->sps.sar;
3773 if(!s->avctx->sample_aspect_ratio.den)
3774 s->avctx->sample_aspect_ratio.den = 1;
3776 if(h->sps.timing_info_present_flag){
3777 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3778 if(h->x264_build > 0 && h->x264_build < 44)
3779 s->avctx->time_base.den *= 2;
3780 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3781 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3785 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3788 h->mb_aff_frame = 0;
3789 last_pic_structure = s0->picture_structure;
3790 if(h->sps.frame_mbs_only_flag){
3791 s->picture_structure= PICT_FRAME;
3793 if(get_bits1(&s->gb)) { //field_pic_flag
3794 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3796 s->picture_structure= PICT_FRAME;
3797 h->mb_aff_frame = h->sps.mb_aff;
3800 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3802 if(h0->current_slice == 0){
3803 while(h->frame_num != h->prev_frame_num &&
3804 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3805 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3807 h->prev_frame_num++;
3808 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3809 s->current_picture_ptr->frame_num= h->prev_frame_num;
3810 execute_ref_pic_marking(h, NULL, 0);
3813 /* See if we have a decoded first field looking for a pair... */
3814 if (s0->first_field) {
3815 assert(s0->current_picture_ptr);
3816 assert(s0->current_picture_ptr->data[0]);
3817 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3819 /* figure out if we have a complementary field pair */
3820 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3822 * Previous field is unmatched. Don't display it, but let it
3823 * remain for reference if marked as such.
3825 s0->current_picture_ptr = NULL;
3826 s0->first_field = FIELD_PICTURE;
3829 if (h->nal_ref_idc &&
3830 s0->current_picture_ptr->reference &&
3831 s0->current_picture_ptr->frame_num != h->frame_num) {
3833 * This and previous field were reference, but had
3834 * different frame_nums. Consider this field first in
3835 * pair. Throw away previous field except for reference
3838 s0->first_field = 1;
3839 s0->current_picture_ptr = NULL;
3842 /* Second field in complementary pair */
3843 s0->first_field = 0;
3848 /* Frame or first field in a potentially complementary pair */
3849 assert(!s0->current_picture_ptr);
3850 s0->first_field = FIELD_PICTURE;
3853 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3854 s0->first_field = 0;
3861 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3863 assert(s->mb_num == s->mb_width * s->mb_height);
3864 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3865 first_mb_in_slice >= s->mb_num){
3866 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3869 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3870 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3871 if (s->picture_structure == PICT_BOTTOM_FIELD)
3872 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3873 assert(s->mb_y < s->mb_height);
3875 if(s->picture_structure==PICT_FRAME){
3876 h->curr_pic_num= h->frame_num;
3877 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3879 h->curr_pic_num= 2*h->frame_num + 1;
3880 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3883 if(h->nal_unit_type == NAL_IDR_SLICE){
3884 get_ue_golomb(&s->gb); /* idr_pic_id */
3887 if(h->sps.poc_type==0){
3888 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3890 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3891 h->delta_poc_bottom= get_se_golomb(&s->gb);
3895 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3896 h->delta_poc[0]= get_se_golomb(&s->gb);
3898 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3899 h->delta_poc[1]= get_se_golomb(&s->gb);
3904 if(h->pps.redundant_pic_cnt_present){
3905 h->redundant_pic_count= get_ue_golomb(&s->gb);
3908 //set defaults, might be overridden a few lines later
3909 h->ref_count[0]= h->pps.ref_count[0];
3910 h->ref_count[1]= h->pps.ref_count[1];
3912 if(h->slice_type_nos != FF_I_TYPE){
3913 if(h->slice_type_nos == FF_B_TYPE){
3914 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3916 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3918 if(num_ref_idx_active_override_flag){
3919 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3920 if(h->slice_type_nos==FF_B_TYPE)
3921 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3923 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3924 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3925 h->ref_count[0]= h->ref_count[1]= 1;
3929 if(h->slice_type_nos == FF_B_TYPE)
3936 if(!default_ref_list_done){
3937 fill_default_ref_list(h);
3940 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3943 if(h->slice_type_nos!=FF_I_TYPE){
3944 s->last_picture_ptr= &h->ref_list[0][0];
3945 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3947 if(h->slice_type_nos==FF_B_TYPE){
3948 s->next_picture_ptr= &h->ref_list[1][0];
3949 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3952 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3953 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3954 pred_weight_table(h);
3955 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3956 implicit_weight_table(h);
3959 for (i = 0; i < 2; i++) {
3960 h->luma_weight_flag[i] = 0;
3961 h->chroma_weight_flag[i] = 0;
3966 decode_ref_pic_marking(h0, &s->gb);
3969 fill_mbaff_ref_list(h);
3971 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3972 direct_dist_scale_factor(h);
3973 direct_ref_list_init(h);
3975 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3976 tmp = get_ue_golomb_31(&s->gb);
3978 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3981 h->cabac_init_idc= tmp;
3984 h->last_qscale_diff = 0;
3985 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3987 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3991 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3992 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3993 //FIXME qscale / qp ... stuff
3994 if(h->slice_type == FF_SP_TYPE){
3995 get_bits1(&s->gb); /* sp_for_switch_flag */
3997 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3998 get_se_golomb(&s->gb); /* slice_qs_delta */
4001 h->deblocking_filter = 1;
4002 h->slice_alpha_c0_offset = 0;
4003 h->slice_beta_offset = 0;
4004 if( h->pps.deblocking_filter_parameters_present ) {
4005 tmp= get_ue_golomb_31(&s->gb);
4007 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4010 h->deblocking_filter= tmp;
4011 if(h->deblocking_filter < 2)
4012 h->deblocking_filter^= 1; // 1<->0
4014 if( h->deblocking_filter ) {
4015 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4016 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4020 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4021 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4022 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4023 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4024 h->deblocking_filter= 0;
4026 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4027 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4028 /* Cheat slightly for speed:
4029 Do not bother to deblock across slices. */
4030 h->deblocking_filter = 2;
4032 h0->max_contexts = 1;
4033 if(!h0->single_decode_warning) {
4034 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4035 h0->single_decode_warning = 1;
4038 return 1; // deblocking switched inside frame
4043 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4044 slice_group_change_cycle= get_bits(&s->gb, ?);
4047 h0->last_slice_type = slice_type;
4048 h->slice_num = ++h0->current_slice;
4049 if(h->slice_num >= MAX_SLICES){
4050 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4054 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4058 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4059 +(h->ref_list[j][i].reference&3);
4062 for(i=16; i<48; i++)
4063 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4064 +(h->ref_list[j][i].reference&3);
4067 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4068 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4070 s->avctx->refs= h->sps.ref_frame_count;
4072 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4073 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4075 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4077 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4078 pps_id, h->frame_num,
4079 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4080 h->ref_count[0], h->ref_count[1],
4082 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4084 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4085 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4095 static inline int get_level_prefix(GetBitContext *gb){
4099 OPEN_READER(re, gb);
4100 UPDATE_CACHE(re, gb);
4101 buf=GET_CACHE(re, gb);
4103 log= 32 - av_log2(buf);
4105 print_bin(buf>>(32-log), log);
4106 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4109 LAST_SKIP_BITS(re, gb, log);
4110 CLOSE_READER(re, gb);
4115 static inline int get_dct8x8_allowed(H264Context *h){
4116 if(h->sps.direct_8x8_inference_flag)
4117 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4119 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4123 * decodes a residual block.
4124 * @param n block index
4125 * @param scantable scantable
4126 * @param max_coeff number of coefficients in the block
4127 * @return <0 if an error occurred
4129 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4130 MpegEncContext * const s = &h->s;
4131 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4133 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4135 //FIXME put trailing_onex into the context
4137 if(n == CHROMA_DC_BLOCK_INDEX){
4138 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4139 total_coeff= coeff_token>>2;
4141 if(n == LUMA_DC_BLOCK_INDEX){
4142 total_coeff= pred_non_zero_count(h, 0);
4143 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4144 total_coeff= coeff_token>>2;
4146 total_coeff= pred_non_zero_count(h, n);
4147 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4148 total_coeff= coeff_token>>2;
4149 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4153 //FIXME set last_non_zero?
4157 if(total_coeff > (unsigned)max_coeff) {
4158 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4162 trailing_ones= coeff_token&3;
4163 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4164 assert(total_coeff<=16);
4166 i = show_bits(gb, 3);
4167 skip_bits(gb, trailing_ones);
4168 level[0] = 1-((i&4)>>1);
4169 level[1] = 1-((i&2) );
4170 level[2] = 1-((i&1)<<1);
4172 if(trailing_ones<total_coeff) {
4174 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4175 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4176 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4178 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4179 if(level_code >= 100){
4180 prefix= level_code - 100;
4181 if(prefix == LEVEL_TAB_BITS)
4182 prefix += get_level_prefix(gb);
4184 //first coefficient has suffix_length equal to 0 or 1
4185 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4187 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4189 level_code= (prefix<<suffix_length); //part
4190 }else if(prefix==14){
4192 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4194 level_code= prefix + get_bits(gb, 4); //part
4196 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4197 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4199 level_code += (1<<(prefix-3))-4096;
4202 if(trailing_ones < 3) level_code += 2;
4205 mask= -(level_code&1);
4206 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4208 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4211 if(level_code + 3U > 6U)
4213 level[trailing_ones]= level_code;
4216 //remaining coefficients have suffix_length > 0
4217 for(i=trailing_ones+1;i<total_coeff;i++) {
4218 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4219 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4220 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4222 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4223 if(level_code >= 100){
4224 prefix= level_code - 100;
4225 if(prefix == LEVEL_TAB_BITS){
4226 prefix += get_level_prefix(gb);
4229 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4231 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4233 level_code += (1<<(prefix-3))-4096;
4235 mask= -(level_code&1);
4236 level_code= (((2+level_code)>>1) ^ mask) - mask;
4238 level[i]= level_code;
4240 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4245 if(total_coeff == max_coeff)
4248 if(n == CHROMA_DC_BLOCK_INDEX)
4249 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4251 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4254 coeff_num = zeros_left + total_coeff - 1;
4255 j = scantable[coeff_num];
4257 block[j] = level[0];
4258 for(i=1;i<total_coeff;i++) {
4261 else if(zeros_left < 7){
4262 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4264 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4266 zeros_left -= run_before;
4267 coeff_num -= 1 + run_before;
4268 j= scantable[ coeff_num ];
4273 block[j] = (level[0] * qmul[j] + 32)>>6;
4274 for(i=1;i<total_coeff;i++) {
4277 else if(zeros_left < 7){
4278 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4280 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4282 zeros_left -= run_before;
4283 coeff_num -= 1 + run_before;
4284 j= scantable[ coeff_num ];
4286 block[j]= (level[i] * qmul[j] + 32)>>6;
4291 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4298 static void predict_field_decoding_flag(H264Context *h){
4299 MpegEncContext * const s = &h->s;
4300 const int mb_xy= h->mb_xy;
4301 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4302 ? s->current_picture.mb_type[mb_xy-1]
4303 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4304 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4306 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4310 * decodes a P_SKIP or B_SKIP macroblock
4312 static void decode_mb_skip(H264Context *h){
4313 MpegEncContext * const s = &h->s;
4314 const int mb_xy= h->mb_xy;
4317 memset(h->non_zero_count[mb_xy], 0, 16);
4318 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4321 mb_type|= MB_TYPE_INTERLACED;
4323 if( h->slice_type_nos == FF_B_TYPE )
4325 // just for fill_caches. pred_direct_motion will set the real mb_type
4326 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4328 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4329 pred_direct_motion(h, &mb_type);
4330 mb_type|= MB_TYPE_SKIP;
4335 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4337 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4338 pred_pskip_motion(h, &mx, &my);
4339 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4340 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4343 write_back_motion(h, mb_type);
4344 s->current_picture.mb_type[mb_xy]= mb_type;
4345 s->current_picture.qscale_table[mb_xy]= s->qscale;
4346 h->slice_table[ mb_xy ]= h->slice_num;
4347 h->prev_mb_skipped= 1;
4351 * decodes a macroblock
4352 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4354 static int decode_mb_cavlc(H264Context *h){
4355 MpegEncContext * const s = &h->s;
4357 int partition_count;
4358 unsigned int mb_type, cbp;
4359 int dct8x8_allowed= h->pps.transform_8x8_mode;
4361 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4363 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4364 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4366 if(h->slice_type_nos != FF_I_TYPE){
4367 if(s->mb_skip_run==-1)
4368 s->mb_skip_run= get_ue_golomb(&s->gb);
4370 if (s->mb_skip_run--) {
4371 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4372 if(s->mb_skip_run==0)
4373 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4375 predict_field_decoding_flag(h);
4382 if( (s->mb_y&1) == 0 )
4383 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4386 h->prev_mb_skipped= 0;
4388 mb_type= get_ue_golomb(&s->gb);
4389 if(h->slice_type_nos == FF_B_TYPE){
4391 partition_count= b_mb_type_info[mb_type].partition_count;
4392 mb_type= b_mb_type_info[mb_type].type;
4395 goto decode_intra_mb;
4397 }else if(h->slice_type_nos == FF_P_TYPE){
4399 partition_count= p_mb_type_info[mb_type].partition_count;
4400 mb_type= p_mb_type_info[mb_type].type;
4403 goto decode_intra_mb;
4406 assert(h->slice_type_nos == FF_I_TYPE);
4407 if(h->slice_type == FF_SI_TYPE && mb_type)
4411 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4415 cbp= i_mb_type_info[mb_type].cbp;
4416 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4417 mb_type= i_mb_type_info[mb_type].type;
4421 mb_type |= MB_TYPE_INTERLACED;
4423 h->slice_table[ mb_xy ]= h->slice_num;
4425 if(IS_INTRA_PCM(mb_type)){
4428 // We assume these blocks are very rare so we do not optimize it.
4429 align_get_bits(&s->gb);
4431 // The pixels are stored in the same order as levels in h->mb array.
4432 for(x=0; x < (CHROMA ? 384 : 256); x++){
4433 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4436 // In deblocking, the quantizer is 0
4437 s->current_picture.qscale_table[mb_xy]= 0;
4438 // All coeffs are present
4439 memset(h->non_zero_count[mb_xy], 16, 16);
4441 s->current_picture.mb_type[mb_xy]= mb_type;
4446 h->ref_count[0] <<= 1;
4447 h->ref_count[1] <<= 1;
4450 fill_caches(h, mb_type, 0);
4453 if(IS_INTRA(mb_type)){
4455 // init_top_left_availability(h);
4456 if(IS_INTRA4x4(mb_type)){
4459 if(dct8x8_allowed && get_bits1(&s->gb)){
4460 mb_type |= MB_TYPE_8x8DCT;
4464 // fill_intra4x4_pred_table(h);
4465 for(i=0; i<16; i+=di){
4466 int mode= pred_intra_mode(h, i);
4468 if(!get_bits1(&s->gb)){
4469 const int rem_mode= get_bits(&s->gb, 3);
4470 mode = rem_mode + (rem_mode >= mode);
4474 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4476 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4478 write_back_intra_pred_mode(h);
4479 if( check_intra4x4_pred_mode(h) < 0)
4482 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4483 if(h->intra16x16_pred_mode < 0)
4487 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4490 h->chroma_pred_mode= pred_mode;
4492 }else if(partition_count==4){
4493 int i, j, sub_partition_count[4], list, ref[2][4];
4495 if(h->slice_type_nos == FF_B_TYPE){
4497 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4498 if(h->sub_mb_type[i] >=13){
4499 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4502 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4503 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4505 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4506 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4507 pred_direct_motion(h, &mb_type);
4508 h->ref_cache[0][scan8[4]] =
4509 h->ref_cache[1][scan8[4]] =
4510 h->ref_cache[0][scan8[12]] =
4511 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4514 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4516 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4517 if(h->sub_mb_type[i] >=4){
4518 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4521 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4522 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4526 for(list=0; list<h->list_count; list++){
4527 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4529 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4530 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4534 }else if(ref_count == 2){
4535 tmp= get_bits1(&s->gb)^1;
4537 tmp= get_ue_golomb_31(&s->gb);
4539 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4552 dct8x8_allowed = get_dct8x8_allowed(h);
4554 for(list=0; list<h->list_count; list++){
4556 if(IS_DIRECT(h->sub_mb_type[i])) {
4557 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4560 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4561 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4563 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4564 const int sub_mb_type= h->sub_mb_type[i];
4565 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4566 for(j=0; j<sub_partition_count[i]; j++){
4568 const int index= 4*i + block_width*j;
4569 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4570 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4571 mx += get_se_golomb(&s->gb);
4572 my += get_se_golomb(&s->gb);
4573 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4575 if(IS_SUB_8X8(sub_mb_type)){
4577 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4579 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4580 }else if(IS_SUB_8X4(sub_mb_type)){
4581 mv_cache[ 1 ][0]= mx;
4582 mv_cache[ 1 ][1]= my;
4583 }else if(IS_SUB_4X8(sub_mb_type)){
4584 mv_cache[ 8 ][0]= mx;
4585 mv_cache[ 8 ][1]= my;
4587 mv_cache[ 0 ][0]= mx;
4588 mv_cache[ 0 ][1]= my;
4591 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4597 }else if(IS_DIRECT(mb_type)){
4598 pred_direct_motion(h, &mb_type);
4599 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4601 int list, mx, my, i;
4602 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4603 if(IS_16X16(mb_type)){
4604 for(list=0; list<h->list_count; list++){
4606 if(IS_DIR(mb_type, 0, list)){
4607 if(h->ref_count[list]==1){
4609 }else if(h->ref_count[list]==2){
4610 val= get_bits1(&s->gb)^1;
4612 val= get_ue_golomb_31(&s->gb);
4613 if(val >= h->ref_count[list]){
4614 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4619 val= LIST_NOT_USED&0xFF;
4620 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4622 for(list=0; list<h->list_count; list++){
4624 if(IS_DIR(mb_type, 0, list)){
4625 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4626 mx += get_se_golomb(&s->gb);
4627 my += get_se_golomb(&s->gb);
4628 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4630 val= pack16to32(mx,my);
4633 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4636 else if(IS_16X8(mb_type)){
4637 for(list=0; list<h->list_count; list++){
4640 if(IS_DIR(mb_type, i, list)){
4641 if(h->ref_count[list] == 1){
4643 }else if(h->ref_count[list] == 2){
4644 val= get_bits1(&s->gb)^1;
4646 val= get_ue_golomb_31(&s->gb);
4647 if(val >= h->ref_count[list]){
4648 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4653 val= LIST_NOT_USED&0xFF;
4654 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4657 for(list=0; list<h->list_count; list++){
4660 if(IS_DIR(mb_type, i, list)){
4661 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4662 mx += get_se_golomb(&s->gb);
4663 my += get_se_golomb(&s->gb);
4664 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4666 val= pack16to32(mx,my);
4669 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4673 assert(IS_8X16(mb_type));
4674 for(list=0; list<h->list_count; list++){
4677 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4678 if(h->ref_count[list]==1){
4680 }else if(h->ref_count[list]==2){
4681 val= get_bits1(&s->gb)^1;
4683 val= get_ue_golomb_31(&s->gb);
4684 if(val >= h->ref_count[list]){
4685 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4690 val= LIST_NOT_USED&0xFF;
4691 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4694 for(list=0; list<h->list_count; list++){
4697 if(IS_DIR(mb_type, i, list)){
4698 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4699 mx += get_se_golomb(&s->gb);
4700 my += get_se_golomb(&s->gb);
4701 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4703 val= pack16to32(mx,my);
4706 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4712 if(IS_INTER(mb_type))
4713 write_back_motion(h, mb_type);
4715 if(!IS_INTRA16x16(mb_type)){
4716 cbp= get_ue_golomb(&s->gb);
4718 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4723 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4724 else cbp= golomb_to_inter_cbp [cbp];
4726 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4727 else cbp= golomb_to_inter_cbp_gray[cbp];
4732 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4733 if(get_bits1(&s->gb)){
4734 mb_type |= MB_TYPE_8x8DCT;
4735 h->cbp_table[mb_xy]= cbp;
4738 s->current_picture.mb_type[mb_xy]= mb_type;
4740 if(cbp || IS_INTRA16x16(mb_type)){
4741 int i8x8, i4x4, chroma_idx;
4743 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4744 const uint8_t *scan, *scan8x8, *dc_scan;
4746 // fill_non_zero_count_cache(h);
4748 if(IS_INTERLACED(mb_type)){
4749 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4750 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4751 dc_scan= luma_dc_field_scan;
4753 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4754 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4755 dc_scan= luma_dc_zigzag_scan;
4758 dquant= get_se_golomb(&s->gb);
4760 if( dquant > 25 || dquant < -26 ){
4761 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4765 s->qscale += dquant;
4766 if(((unsigned)s->qscale) > 51){
4767 if(s->qscale<0) s->qscale+= 52;
4768 else s->qscale-= 52;
4771 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4772 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4773 if(IS_INTRA16x16(mb_type)){
4774 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4775 return -1; //FIXME continue if partitioned and other return -1 too
4778 assert((cbp&15) == 0 || (cbp&15) == 15);
4781 for(i8x8=0; i8x8<4; i8x8++){
4782 for(i4x4=0; i4x4<4; i4x4++){
4783 const int index= i4x4 + 4*i8x8;
4784 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4790 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4793 for(i8x8=0; i8x8<4; i8x8++){
4794 if(cbp & (1<<i8x8)){
4795 if(IS_8x8DCT(mb_type)){
4796 DCTELEM *buf = &h->mb[64*i8x8];
4798 for(i4x4=0; i4x4<4; i4x4++){
4799 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4800 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4803 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4804 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4806 for(i4x4=0; i4x4<4; i4x4++){
4807 const int index= i4x4 + 4*i8x8;
4809 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4815 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4816 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4822 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4823 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4829 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4830 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4831 for(i4x4=0; i4x4<4; i4x4++){
4832 const int index= 16 + 4*chroma_idx + i4x4;
4833 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4839 uint8_t * const nnz= &h->non_zero_count_cache[0];
4840 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4841 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4844 uint8_t * const nnz= &h->non_zero_count_cache[0];
4845 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4846 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4847 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4849 s->current_picture.qscale_table[mb_xy]= s->qscale;
4850 write_back_non_zero_count(h);
4853 h->ref_count[0] >>= 1;
4854 h->ref_count[1] >>= 1;
4860 static int decode_cabac_field_decoding_flag(H264Context *h) {
4861 MpegEncContext * const s = &h->s;
4862 const int mb_x = s->mb_x;
4863 const int mb_y = s->mb_y & ~1;
4864 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4865 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4867 unsigned int ctx = 0;
4869 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4872 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4876 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4879 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4880 uint8_t *state= &h->cabac_state[ctx_base];
4884 MpegEncContext * const s = &h->s;
4885 const int mba_xy = h->left_mb_xy[0];
4886 const int mbb_xy = h->top_mb_xy;
4888 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4890 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4892 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4893 return 0; /* I4x4 */
4896 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4897 return 0; /* I4x4 */
4900 if( get_cabac_terminate( &h->cabac ) )
4901 return 25; /* PCM */
4903 mb_type = 1; /* I16x16 */
4904 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4905 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4906 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4907 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4908 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4912 static int decode_cabac_mb_type_b( H264Context *h ) {
4913 MpegEncContext * const s = &h->s;
4915 const int mba_xy = h->left_mb_xy[0];
4916 const int mbb_xy = h->top_mb_xy;
4919 assert(h->slice_type_nos == FF_B_TYPE);
4921 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4923 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4926 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4927 return 0; /* B_Direct_16x16 */
4929 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4930 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4933 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4934 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4935 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4936 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4938 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4939 else if( bits == 13 ) {
4940 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4941 } else if( bits == 14 )
4942 return 11; /* B_L1_L0_8x16 */
4943 else if( bits == 15 )
4944 return 22; /* B_8x8 */
4946 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4947 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4950 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4951 MpegEncContext * const s = &h->s;
4955 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4956 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4959 && h->slice_table[mba_xy] == h->slice_num
4960 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4961 mba_xy += s->mb_stride;
4963 mbb_xy = mb_xy - s->mb_stride;
4965 && h->slice_table[mbb_xy] == h->slice_num
4966 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4967 mbb_xy -= s->mb_stride;
4969 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4971 int mb_xy = h->mb_xy;
4973 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4976 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4978 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4981 if( h->slice_type_nos == FF_B_TYPE )
4983 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4986 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4989 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4992 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4993 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4994 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4996 if( mode >= pred_mode )
5002 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5003 const int mba_xy = h->left_mb_xy[0];
5004 const int mbb_xy = h->top_mb_xy;
5008 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5009 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5012 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5015 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5018 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5020 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5026 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5027 int cbp_b, cbp_a, ctx, cbp = 0;
5029 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5030 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5032 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5033 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5034 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5035 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5036 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5037 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5038 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5039 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5042 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5046 cbp_a = (h->left_cbp>>4)&0x03;
5047 cbp_b = (h-> top_cbp>>4)&0x03;
5050 if( cbp_a > 0 ) ctx++;
5051 if( cbp_b > 0 ) ctx += 2;
5052 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5056 if( cbp_a == 2 ) ctx++;
5057 if( cbp_b == 2 ) ctx += 2;
5058 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5060 static int decode_cabac_mb_dqp( H264Context *h) {
5061 int ctx= h->last_qscale_diff != 0;
5064 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5067 if(val > 102) //prevent infinite loop
5072 return (val + 1)>>1 ;
5074 return -((val + 1)>>1);
5076 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5077 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5079 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5081 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5085 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5087 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5088 return 0; /* B_Direct_8x8 */
5089 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5090 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5092 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5093 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5094 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5097 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5098 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5102 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5103 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5106 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5107 int refa = h->ref_cache[list][scan8[n] - 1];
5108 int refb = h->ref_cache[list][scan8[n] - 8];
5112 if( h->slice_type_nos == FF_B_TYPE) {
5113 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5115 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5124 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5127 if(ref >= 32 /*h->ref_list[list]*/){
5134 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5135 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5136 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5137 int ctxbase = (l == 0) ? 40 : 47;
5139 int ctx = (amvd>2) + (amvd>32);
5141 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5146 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5154 while( get_cabac_bypass( &h->cabac ) ) {
5158 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5163 if( get_cabac_bypass( &h->cabac ) )
5167 return get_cabac_bypass_sign( &h->cabac, -mvd );
5170 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5176 nza = h->left_cbp&0x100;
5177 nzb = h-> top_cbp&0x100;
5179 nza = (h->left_cbp>>(6+idx))&0x01;
5180 nzb = (h-> top_cbp>>(6+idx))&0x01;
5183 assert(cat == 1 || cat == 2 || cat == 4);
5184 nza = h->non_zero_count_cache[scan8[idx] - 1];
5185 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5194 return ctx + 4 * cat;
5197 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5198 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5199 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5200 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5201 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5204 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5205 static const int significant_coeff_flag_offset[2][6] = {
5206 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5207 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5209 static const int last_coeff_flag_offset[2][6] = {
5210 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5211 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5213 static const int coeff_abs_level_m1_offset[6] = {
5214 227+0, 227+10, 227+20, 227+30, 227+39, 426
5216 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5217 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5218 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5219 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5220 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5221 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5222 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5223 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5224 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5226 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5227 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5228 * map node ctx => cabac ctx for level=1 */
5229 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5230 /* map node ctx => cabac ctx for level>1 */
5231 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5232 static const uint8_t coeff_abs_level_transition[2][8] = {
5233 /* update node ctx after decoding a level=1 */
5234 { 1, 2, 3, 3, 4, 5, 6, 7 },
5235 /* update node ctx after decoding a level>1 */
5236 { 4, 4, 4, 4, 5, 6, 7, 7 }
5242 int coeff_count = 0;
5245 uint8_t *significant_coeff_ctx_base;
5246 uint8_t *last_coeff_ctx_base;
5247 uint8_t *abs_level_m1_ctx_base;
5250 #define CABAC_ON_STACK
5252 #ifdef CABAC_ON_STACK
5255 cc.range = h->cabac.range;
5256 cc.low = h->cabac.low;
5257 cc.bytestream= h->cabac.bytestream;
5259 #define CC &h->cabac
5263 /* cat: 0-> DC 16x16 n = 0
5264 * 1-> AC 16x16 n = luma4x4idx
5265 * 2-> Luma4x4 n = luma4x4idx
5266 * 3-> DC Chroma n = iCbCr
5267 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5268 * 5-> Luma8x8 n = 4 * luma8x8idx
5271 /* read coded block flag */
5272 if( is_dc || cat != 5 ) {
5273 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5275 h->non_zero_count_cache[scan8[n]] = 0;
5277 #ifdef CABAC_ON_STACK
5278 h->cabac.range = cc.range ;
5279 h->cabac.low = cc.low ;
5280 h->cabac.bytestream= cc.bytestream;
5286 significant_coeff_ctx_base = h->cabac_state
5287 + significant_coeff_flag_offset[MB_FIELD][cat];
5288 last_coeff_ctx_base = h->cabac_state
5289 + last_coeff_flag_offset[MB_FIELD][cat];
5290 abs_level_m1_ctx_base = h->cabac_state
5291 + coeff_abs_level_m1_offset[cat];
5293 if( !is_dc && cat == 5 ) {
5294 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5295 for(last= 0; last < coefs; last++) { \
5296 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5297 if( get_cabac( CC, sig_ctx )) { \
5298 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5299 index[coeff_count++] = last; \
5300 if( get_cabac( CC, last_ctx ) ) { \
5306 if( last == max_coeff -1 ) {\
5307 index[coeff_count++] = last;\
5309 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5310 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5311 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5313 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5315 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5317 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5320 assert(coeff_count > 0);
5324 h->cbp_table[h->mb_xy] |= 0x100;
5326 h->cbp_table[h->mb_xy] |= 0x40 << n;
5329 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5331 assert( cat == 1 || cat == 2 || cat == 4 );
5332 h->non_zero_count_cache[scan8[n]] = coeff_count;
5337 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5339 int j= scantable[index[--coeff_count]];
5341 if( get_cabac( CC, ctx ) == 0 ) {
5342 node_ctx = coeff_abs_level_transition[0][node_ctx];
5344 block[j] = get_cabac_bypass_sign( CC, -1);
5346 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5350 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5351 node_ctx = coeff_abs_level_transition[1][node_ctx];
5353 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5357 if( coeff_abs >= 15 ) {
5359 while( get_cabac_bypass( CC ) ) {
5365 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5371 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5373 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5376 } while( coeff_count );
5377 #ifdef CABAC_ON_STACK
5378 h->cabac.range = cc.range ;
5379 h->cabac.low = cc.low ;
5380 h->cabac.bytestream= cc.bytestream;
5386 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5387 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5390 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5391 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5395 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5397 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5399 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5400 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5404 static inline void compute_mb_neighbors(H264Context *h)
5406 MpegEncContext * const s = &h->s;
5407 const int mb_xy = h->mb_xy;
5408 h->top_mb_xy = mb_xy - s->mb_stride;
5409 h->left_mb_xy[0] = mb_xy - 1;
5411 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5412 const int top_pair_xy = pair_xy - s->mb_stride;
5413 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5414 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5415 const int curr_mb_field_flag = MB_FIELD;
5416 const int bottom = (s->mb_y & 1);
5418 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5419 h->top_mb_xy -= s->mb_stride;
5421 if (!left_mb_field_flag == curr_mb_field_flag) {
5422 h->left_mb_xy[0] = pair_xy - 1;
5424 } else if (FIELD_PICTURE) {
5425 h->top_mb_xy -= s->mb_stride;
5431 * decodes a macroblock
5432 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5434 static int decode_mb_cabac(H264Context *h) {
5435 MpegEncContext * const s = &h->s;
5437 int mb_type, partition_count, cbp = 0;
5438 int dct8x8_allowed= h->pps.transform_8x8_mode;
5440 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5442 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5443 if( h->slice_type_nos != FF_I_TYPE ) {
5445 /* a skipped mb needs the aff flag from the following mb */
5446 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5447 predict_field_decoding_flag(h);
5448 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5449 skip = h->next_mb_skipped;
5451 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5452 /* read skip flags */
5454 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5455 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5456 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5457 if(!h->next_mb_skipped)
5458 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5463 h->cbp_table[mb_xy] = 0;
5464 h->chroma_pred_mode_table[mb_xy] = 0;
5465 h->last_qscale_diff = 0;
5472 if( (s->mb_y&1) == 0 )
5474 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5477 h->prev_mb_skipped = 0;
5479 compute_mb_neighbors(h);
5481 if( h->slice_type_nos == FF_B_TYPE ) {
5482 mb_type = decode_cabac_mb_type_b( h );
5484 partition_count= b_mb_type_info[mb_type].partition_count;
5485 mb_type= b_mb_type_info[mb_type].type;
5488 goto decode_intra_mb;
5490 } else if( h->slice_type_nos == FF_P_TYPE ) {
5491 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5493 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5494 /* P_L0_D16x16, P_8x8 */
5495 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5497 /* P_L0_D8x16, P_L0_D16x8 */
5498 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5500 partition_count= p_mb_type_info[mb_type].partition_count;
5501 mb_type= p_mb_type_info[mb_type].type;
5503 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5504 goto decode_intra_mb;
5507 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5508 if(h->slice_type == FF_SI_TYPE && mb_type)
5510 assert(h->slice_type_nos == FF_I_TYPE);
5512 partition_count = 0;
5513 cbp= i_mb_type_info[mb_type].cbp;
5514 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5515 mb_type= i_mb_type_info[mb_type].type;
5518 mb_type |= MB_TYPE_INTERLACED;
5520 h->slice_table[ mb_xy ]= h->slice_num;
5522 if(IS_INTRA_PCM(mb_type)) {
5525 // We assume these blocks are very rare so we do not optimize it.
5526 // FIXME The two following lines get the bitstream position in the cabac
5527 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5528 ptr= h->cabac.bytestream;
5529 if(h->cabac.low&0x1) ptr--;
5531 if(h->cabac.low&0x1FF) ptr--;
5534 // The pixels are stored in the same order as levels in h->mb array.
5535 memcpy(h->mb, ptr, 256); ptr+=256;
5537 memcpy(h->mb+128, ptr, 128); ptr+=128;
5540 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5542 // All blocks are present
5543 h->cbp_table[mb_xy] = 0x1ef;
5544 h->chroma_pred_mode_table[mb_xy] = 0;
5545 // In deblocking, the quantizer is 0
5546 s->current_picture.qscale_table[mb_xy]= 0;
5547 // All coeffs are present
5548 memset(h->non_zero_count[mb_xy], 16, 16);
5549 s->current_picture.mb_type[mb_xy]= mb_type;
5550 h->last_qscale_diff = 0;
5555 h->ref_count[0] <<= 1;
5556 h->ref_count[1] <<= 1;
5559 fill_caches(h, mb_type, 0);
5561 if( IS_INTRA( mb_type ) ) {
5563 if( IS_INTRA4x4( mb_type ) ) {
5564 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5565 mb_type |= MB_TYPE_8x8DCT;
5566 for( i = 0; i < 16; i+=4 ) {
5567 int pred = pred_intra_mode( h, i );
5568 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5569 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5572 for( i = 0; i < 16; i++ ) {
5573 int pred = pred_intra_mode( h, i );
5574 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5576 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5579 write_back_intra_pred_mode(h);
5580 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5582 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5583 if( h->intra16x16_pred_mode < 0 ) return -1;
5586 h->chroma_pred_mode_table[mb_xy] =
5587 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5589 pred_mode= check_intra_pred_mode( h, pred_mode );
5590 if( pred_mode < 0 ) return -1;
5591 h->chroma_pred_mode= pred_mode;
5593 } else if( partition_count == 4 ) {
5594 int i, j, sub_partition_count[4], list, ref[2][4];
5596 if( h->slice_type_nos == FF_B_TYPE ) {
5597 for( i = 0; i < 4; i++ ) {
5598 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5599 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5600 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5602 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5603 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5604 pred_direct_motion(h, &mb_type);
5605 h->ref_cache[0][scan8[4]] =
5606 h->ref_cache[1][scan8[4]] =
5607 h->ref_cache[0][scan8[12]] =
5608 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5609 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5610 for( i = 0; i < 4; i++ )
5611 if( IS_DIRECT(h->sub_mb_type[i]) )
5612 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5616 for( i = 0; i < 4; i++ ) {
5617 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5618 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5619 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5623 for( list = 0; list < h->list_count; list++ ) {
5624 for( i = 0; i < 4; i++ ) {
5625 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5626 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5627 if( h->ref_count[list] > 1 ){
5628 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5629 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5630 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5638 h->ref_cache[list][ scan8[4*i]+1 ]=
5639 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5644 dct8x8_allowed = get_dct8x8_allowed(h);
5646 for(list=0; list<h->list_count; list++){
5648 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5649 if(IS_DIRECT(h->sub_mb_type[i])){
5650 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5654 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5655 const int sub_mb_type= h->sub_mb_type[i];
5656 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5657 for(j=0; j<sub_partition_count[i]; j++){
5660 const int index= 4*i + block_width*j;
5661 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5662 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5663 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5665 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5666 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5667 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5669 if(IS_SUB_8X8(sub_mb_type)){
5671 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5673 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5676 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5678 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5679 }else if(IS_SUB_8X4(sub_mb_type)){
5680 mv_cache[ 1 ][0]= mx;
5681 mv_cache[ 1 ][1]= my;
5683 mvd_cache[ 1 ][0]= mx - mpx;
5684 mvd_cache[ 1 ][1]= my - mpy;
5685 }else if(IS_SUB_4X8(sub_mb_type)){
5686 mv_cache[ 8 ][0]= mx;
5687 mv_cache[ 8 ][1]= my;
5689 mvd_cache[ 8 ][0]= mx - mpx;
5690 mvd_cache[ 8 ][1]= my - mpy;
5692 mv_cache[ 0 ][0]= mx;
5693 mv_cache[ 0 ][1]= my;
5695 mvd_cache[ 0 ][0]= mx - mpx;
5696 mvd_cache[ 0 ][1]= my - mpy;
5699 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5700 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5701 p[0] = p[1] = p[8] = p[9] = 0;
5702 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5706 } else if( IS_DIRECT(mb_type) ) {
5707 pred_direct_motion(h, &mb_type);
5708 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5709 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5710 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5712 int list, mx, my, i, mpx, mpy;
5713 if(IS_16X16(mb_type)){
5714 for(list=0; list<h->list_count; list++){
5715 if(IS_DIR(mb_type, 0, list)){
5717 if(h->ref_count[list] > 1){
5718 ref= decode_cabac_mb_ref(h, list, 0);
5719 if(ref >= (unsigned)h->ref_count[list]){
5720 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5725 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5727 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5729 for(list=0; list<h->list_count; list++){
5730 if(IS_DIR(mb_type, 0, list)){
5731 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5733 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5734 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5735 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5737 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5738 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5740 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5743 else if(IS_16X8(mb_type)){
5744 for(list=0; list<h->list_count; list++){
5746 if(IS_DIR(mb_type, i, list)){
5748 if(h->ref_count[list] > 1){
5749 ref= decode_cabac_mb_ref( h, list, 8*i );
5750 if(ref >= (unsigned)h->ref_count[list]){
5751 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5756 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5758 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5761 for(list=0; list<h->list_count; list++){
5763 if(IS_DIR(mb_type, i, list)){
5764 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5765 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5766 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5767 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5769 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5770 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5772 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5773 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5778 assert(IS_8X16(mb_type));
5779 for(list=0; list<h->list_count; list++){
5781 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5783 if(h->ref_count[list] > 1){
5784 ref= decode_cabac_mb_ref( h, list, 4*i );
5785 if(ref >= (unsigned)h->ref_count[list]){
5786 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5791 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5793 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5796 for(list=0; list<h->list_count; list++){
5798 if(IS_DIR(mb_type, i, list)){
5799 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5800 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5801 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5803 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5804 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5805 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5807 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5808 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5815 if( IS_INTER( mb_type ) ) {
5816 h->chroma_pred_mode_table[mb_xy] = 0;
5817 write_back_motion( h, mb_type );
5820 if( !IS_INTRA16x16( mb_type ) ) {
5821 cbp = decode_cabac_mb_cbp_luma( h );
5823 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5826 h->cbp_table[mb_xy] = h->cbp = cbp;
5828 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5829 if( decode_cabac_mb_transform_size( h ) )
5830 mb_type |= MB_TYPE_8x8DCT;
5832 s->current_picture.mb_type[mb_xy]= mb_type;
5834 if( cbp || IS_INTRA16x16( mb_type ) ) {
5835 const uint8_t *scan, *scan8x8, *dc_scan;
5836 const uint32_t *qmul;
5839 if(IS_INTERLACED(mb_type)){
5840 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5841 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5842 dc_scan= luma_dc_field_scan;
5844 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5845 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5846 dc_scan= luma_dc_zigzag_scan;
5849 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5850 if( dqp == INT_MIN ){
5851 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5855 if(((unsigned)s->qscale) > 51){
5856 if(s->qscale<0) s->qscale+= 52;
5857 else s->qscale-= 52;
5859 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5860 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5862 if( IS_INTRA16x16( mb_type ) ) {
5864 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5865 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5868 qmul = h->dequant4_coeff[0][s->qscale];
5869 for( i = 0; i < 16; i++ ) {
5870 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5871 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5874 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5878 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5879 if( cbp & (1<<i8x8) ) {
5880 if( IS_8x8DCT(mb_type) ) {
5881 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5882 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5884 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5885 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5886 const int index = 4*i8x8 + i4x4;
5887 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5889 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5890 //STOP_TIMER("decode_residual")
5894 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5895 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5902 for( c = 0; c < 2; c++ ) {
5903 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5904 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5910 for( c = 0; c < 2; c++ ) {
5911 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5912 for( i = 0; i < 4; i++ ) {
5913 const int index = 16 + 4 * c + i;
5914 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5915 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5919 uint8_t * const nnz= &h->non_zero_count_cache[0];
5920 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5921 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5924 uint8_t * const nnz= &h->non_zero_count_cache[0];
5925 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5926 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5927 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5928 h->last_qscale_diff = 0;
5931 s->current_picture.qscale_table[mb_xy]= s->qscale;
5932 write_back_non_zero_count(h);
5935 h->ref_count[0] >>= 1;
5936 h->ref_count[1] >>= 1;
5943 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5944 const int index_a = qp + h->slice_alpha_c0_offset;
5945 const int alpha = (alpha_table+52)[index_a];
5946 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5950 tc[0] = (tc0_table+52)[index_a][bS[0]];
5951 tc[1] = (tc0_table+52)[index_a][bS[1]];
5952 tc[2] = (tc0_table+52)[index_a][bS[2]];
5953 tc[3] = (tc0_table+52)[index_a][bS[3]];
5954 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5956 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
5959 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5960 const int index_a = qp + h->slice_alpha_c0_offset;
5961 const int alpha = (alpha_table+52)[index_a];
5962 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5966 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
5967 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
5968 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
5969 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
5970 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5972 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5976 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5978 for( i = 0; i < 16; i++, pix += stride) {
5984 int bS_index = (i >> 1);
5987 bS_index |= (i & 1);
5990 if( bS[bS_index] == 0 ) {
5994 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5995 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5996 alpha = (alpha_table+52)[index_a];
5997 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5999 if( bS[bS_index] < 4 ) {
6000 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6001 const int p0 = pix[-1];
6002 const int p1 = pix[-2];
6003 const int p2 = pix[-3];
6004 const int q0 = pix[0];
6005 const int q1 = pix[1];
6006 const int q2 = pix[2];
6008 if( FFABS( p0 - q0 ) < alpha &&
6009 FFABS( p1 - p0 ) < beta &&
6010 FFABS( q1 - q0 ) < beta ) {
6014 if( FFABS( p2 - p0 ) < beta ) {
6015 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6018 if( FFABS( q2 - q0 ) < beta ) {
6019 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6023 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6024 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6025 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6026 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6029 const int p0 = pix[-1];
6030 const int p1 = pix[-2];
6031 const int p2 = pix[-3];
6033 const int q0 = pix[0];
6034 const int q1 = pix[1];
6035 const int q2 = pix[2];
6037 if( FFABS( p0 - q0 ) < alpha &&
6038 FFABS( p1 - p0 ) < beta &&
6039 FFABS( q1 - q0 ) < beta ) {
6041 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6042 if( FFABS( p2 - p0 ) < beta)
6044 const int p3 = pix[-4];
6046 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6047 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6048 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6051 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6053 if( FFABS( q2 - q0 ) < beta)
6055 const int q3 = pix[3];
6057 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6058 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6059 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6062 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6066 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6067 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6069 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6074 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6076 for( i = 0; i < 8; i++, pix += stride) {
6084 if( bS[bS_index] == 0 ) {
6088 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6089 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6090 alpha = (alpha_table+52)[index_a];
6091 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6093 if( bS[bS_index] < 4 ) {
6094 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6095 const int p0 = pix[-1];
6096 const int p1 = pix[-2];
6097 const int q0 = pix[0];
6098 const int q1 = pix[1];
6100 if( FFABS( p0 - q0 ) < alpha &&
6101 FFABS( p1 - p0 ) < beta &&
6102 FFABS( q1 - q0 ) < beta ) {
6103 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6105 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6106 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6107 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6110 const int p0 = pix[-1];
6111 const int p1 = pix[-2];
6112 const int q0 = pix[0];
6113 const int q1 = pix[1];
6115 if( FFABS( p0 - q0 ) < alpha &&
6116 FFABS( p1 - p0 ) < beta &&
6117 FFABS( q1 - q0 ) < beta ) {
6119 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6120 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6121 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6127 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6128 const int index_a = qp + h->slice_alpha_c0_offset;
6129 const int alpha = (alpha_table+52)[index_a];
6130 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6134 tc[0] = (tc0_table+52)[index_a][bS[0]];
6135 tc[1] = (tc0_table+52)[index_a][bS[1]];
6136 tc[2] = (tc0_table+52)[index_a][bS[2]];
6137 tc[3] = (tc0_table+52)[index_a][bS[3]];
6138 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6140 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6144 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6145 const int index_a = qp + h->slice_alpha_c0_offset;
6146 const int alpha = (alpha_table+52)[index_a];
6147 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6151 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6152 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6153 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6154 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6155 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6157 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6161 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6162 MpegEncContext * const s = &h->s;
6163 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6165 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6169 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6170 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6171 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6172 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6173 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6176 assert(!FRAME_MBAFF);
6178 mb_type = s->current_picture.mb_type[mb_xy];
6179 qp = s->current_picture.qscale_table[mb_xy];
6180 qp0 = s->current_picture.qscale_table[mb_xy-1];
6181 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6182 qpc = get_chroma_qp( h, 0, qp );
6183 qpc0 = get_chroma_qp( h, 0, qp0 );
6184 qpc1 = get_chroma_qp( h, 0, qp1 );
6185 qp0 = (qp + qp0 + 1) >> 1;
6186 qp1 = (qp + qp1 + 1) >> 1;
6187 qpc0 = (qpc + qpc0 + 1) >> 1;
6188 qpc1 = (qpc + qpc1 + 1) >> 1;
6189 qp_thresh = 15 - h->slice_alpha_c0_offset;
6190 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6191 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6194 if( IS_INTRA(mb_type) ) {
6195 int16_t bS4[4] = {4,4,4,4};
6196 int16_t bS3[4] = {3,3,3,3};
6197 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6198 if( IS_8x8DCT(mb_type) ) {
6199 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6200 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6201 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6202 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6204 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6205 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6206 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6207 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6208 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6209 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6210 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6211 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6213 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6214 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6215 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6216 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6217 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6218 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6219 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6220 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6223 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6224 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6226 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6228 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6230 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6231 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6232 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6233 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6235 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6236 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6237 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6238 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6240 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6241 bSv[0][0] = 0x0004000400040004ULL;
6242 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6243 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6245 #define FILTER(hv,dir,edge)\
6246 if(bSv[dir][edge]) {\
6247 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6249 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6250 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6256 } else if( IS_8x8DCT(mb_type) ) {
6276 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6277 MpegEncContext * const s = &h->s;
6279 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6280 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6281 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6282 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6283 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6285 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6286 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6287 // how often to recheck mv-based bS when iterating between edges
6288 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6289 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6290 // how often to recheck mv-based bS when iterating along each edge
6291 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6293 if (first_vertical_edge_done) {
6297 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6300 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6301 && !IS_INTERLACED(mb_type)
6302 && IS_INTERLACED(mbm_type)
6304 // This is a special case in the norm where the filtering must
6305 // be done twice (one each of the field) even if we are in a
6306 // frame macroblock.
6308 static const int nnz_idx[4] = {4,5,6,3};
6309 unsigned int tmp_linesize = 2 * linesize;
6310 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6311 int mbn_xy = mb_xy - 2 * s->mb_stride;
6316 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6317 if( IS_INTRA(mb_type) ||
6318 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6319 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6321 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6322 for( i = 0; i < 4; i++ ) {
6323 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6324 mbn_nnz[nnz_idx[i]] != 0 )
6330 // Do not use s->qscale as luma quantizer because it has not the same
6331 // value in IPCM macroblocks.
6332 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6333 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6334 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6335 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6336 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6337 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6338 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6339 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6346 for( edge = start; edge < edges; edge++ ) {
6347 /* mbn_xy: neighbor macroblock */
6348 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6349 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6350 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6354 if( (edge&1) && IS_8x8DCT(mb_type) )
6357 if( IS_INTRA(mb_type) ||
6358 IS_INTRA(mbn_type) ) {
6361 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6362 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6371 bS[0] = bS[1] = bS[2] = bS[3] = value;
6376 if( edge & mask_edge ) {
6377 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6380 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6381 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6384 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6385 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6386 int bn_idx= b_idx - (dir ? 8:1);
6389 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6390 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6391 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6392 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6395 if(h->slice_type_nos == FF_B_TYPE && v){
6397 for( l = 0; !v && l < 2; l++ ) {
6399 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6400 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6401 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6405 bS[0] = bS[1] = bS[2] = bS[3] = v;
6411 for( i = 0; i < 4; i++ ) {
6412 int x = dir == 0 ? edge : i;
6413 int y = dir == 0 ? i : edge;
6414 int b_idx= 8 + 4 + x + 8*y;
6415 int bn_idx= b_idx - (dir ? 8:1);
6417 if( h->non_zero_count_cache[b_idx] |
6418 h->non_zero_count_cache[bn_idx] ) {
6424 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6425 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6426 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6427 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6433 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6435 for( l = 0; l < 2; l++ ) {
6437 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6438 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6439 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6448 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6453 // Do not use s->qscale as luma quantizer because it has not the same
6454 // value in IPCM macroblocks.
6455 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6456 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6457 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6458 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6460 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6461 if( (edge&1) == 0 ) {
6462 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6463 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6464 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6465 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6468 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6469 if( (edge&1) == 0 ) {
6470 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6471 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6472 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6473 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6479 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6480 MpegEncContext * const s = &h->s;
6481 const int mb_xy= mb_x + mb_y*s->mb_stride;
6482 const int mb_type = s->current_picture.mb_type[mb_xy];
6483 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6484 int first_vertical_edge_done = 0;
6487 //for sufficiently low qp, filtering wouldn't do anything
6488 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6490 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6491 int qp = s->current_picture.qscale_table[mb_xy];
6493 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6494 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6499 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6500 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6501 int top_type, left_type[2];
6502 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6503 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6504 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6506 if(IS_8x8DCT(top_type)){
6507 h->non_zero_count_cache[4+8*0]=
6508 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6509 h->non_zero_count_cache[6+8*0]=
6510 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6512 if(IS_8x8DCT(left_type[0])){
6513 h->non_zero_count_cache[3+8*1]=
6514 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6516 if(IS_8x8DCT(left_type[1])){
6517 h->non_zero_count_cache[3+8*3]=
6518 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6521 if(IS_8x8DCT(mb_type)){
6522 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6523 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6525 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6526 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6528 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6529 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6531 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6532 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6537 // left mb is in picture
6538 && h->slice_table[mb_xy-1] != 0xFFFF
6539 // and current and left pair do not have the same interlaced type
6540 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6541 // and left mb is in the same slice if deblocking_filter == 2
6542 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6543 /* First vertical edge is different in MBAFF frames
6544 * There are 8 different bS to compute and 2 different Qp
6546 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6547 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6552 int mb_qp, mbn0_qp, mbn1_qp;
6554 first_vertical_edge_done = 1;
6556 if( IS_INTRA(mb_type) )
6557 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6559 for( i = 0; i < 8; i++ ) {
6560 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6562 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6564 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6565 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6566 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6568 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6575 mb_qp = s->current_picture.qscale_table[mb_xy];
6576 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6577 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6578 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6579 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6580 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6581 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6582 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6583 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6584 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6585 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6586 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6587 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6590 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6591 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6592 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6593 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6594 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6598 for( dir = 0; dir < 2; dir++ )
6599 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6601 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6602 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6606 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6607 H264Context *h = *(void**)arg;
6608 MpegEncContext * const s = &h->s;
6609 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6613 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6614 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6616 if( h->pps.cabac ) {
6620 align_get_bits( &s->gb );
6623 ff_init_cabac_states( &h->cabac);
6624 ff_init_cabac_decoder( &h->cabac,
6625 s->gb.buffer + get_bits_count(&s->gb)/8,
6626 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6627 /* calculate pre-state */
6628 for( i= 0; i < 460; i++ ) {
6630 if( h->slice_type_nos == FF_I_TYPE )
6631 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6633 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6636 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6638 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6643 int ret = decode_mb_cabac(h);
6645 //STOP_TIMER("decode_mb_cabac")
6647 if(ret>=0) hl_decode_mb(h);
6649 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6652 ret = decode_mb_cabac(h);
6654 if(ret>=0) hl_decode_mb(h);
6657 eos = get_cabac_terminate( &h->cabac );
6659 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6660 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6661 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6665 if( ++s->mb_x >= s->mb_width ) {
6667 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6669 if(FIELD_OR_MBAFF_PICTURE) {
6674 if( eos || s->mb_y >= s->mb_height ) {
6675 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6676 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6683 int ret = decode_mb_cavlc(h);
6685 if(ret>=0) hl_decode_mb(h);
6687 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6689 ret = decode_mb_cavlc(h);
6691 if(ret>=0) hl_decode_mb(h);
6696 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6697 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6702 if(++s->mb_x >= s->mb_width){
6704 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6706 if(FIELD_OR_MBAFF_PICTURE) {
6709 if(s->mb_y >= s->mb_height){
6710 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6712 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6713 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6717 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6724 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6725 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6726 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6727 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6731 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6740 for(;s->mb_y < s->mb_height; s->mb_y++){
6741 for(;s->mb_x < s->mb_width; s->mb_x++){
6742 int ret= decode_mb(h);
6747 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6748 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6753 if(++s->mb_x >= s->mb_width){
6755 if(++s->mb_y >= s->mb_height){
6756 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6757 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6761 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6768 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6769 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6770 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6774 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6781 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6784 return -1; //not reached
6787 static int decode_picture_timing(H264Context *h){
6788 MpegEncContext * const s = &h->s;
6789 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6790 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6791 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6793 if(h->sps.pic_struct_present_flag){
6794 unsigned int i, num_clock_ts;
6795 h->sei_pic_struct = get_bits(&s->gb, 4);
6797 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6800 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6802 for (i = 0 ; i < num_clock_ts ; i++){
6803 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6804 unsigned int full_timestamp_flag;
6805 skip_bits(&s->gb, 2); /* ct_type */
6806 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6807 skip_bits(&s->gb, 5); /* counting_type */
6808 full_timestamp_flag = get_bits(&s->gb, 1);
6809 skip_bits(&s->gb, 1); /* discontinuity_flag */
6810 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6811 skip_bits(&s->gb, 8); /* n_frames */
6812 if(full_timestamp_flag){
6813 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6814 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6815 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6817 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6818 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6819 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6820 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6821 if(get_bits(&s->gb, 1)) /* hours_flag */
6822 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6826 if(h->sps.time_offset_length > 0)
6827 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6834 static int decode_unregistered_user_data(H264Context *h, int size){
6835 MpegEncContext * const s = &h->s;
6836 uint8_t user_data[16+256];
6842 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6843 user_data[i]= get_bits(&s->gb, 8);
6847 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6848 if(e==1 && build>=0)
6849 h->x264_build= build;
6851 if(s->avctx->debug & FF_DEBUG_BUGS)
6852 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6855 skip_bits(&s->gb, 8);
6860 static int decode_recovery_point(H264Context *h){
6861 MpegEncContext * const s = &h->s;
6863 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6864 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6869 static int decode_buffering_period(H264Context *h){
6870 MpegEncContext * const s = &h->s;
6871 unsigned int sps_id;
6875 sps_id = get_ue_golomb_31(&s->gb);
6876 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6877 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6880 sps = h->sps_buffers[sps_id];
6882 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6883 if (sps->nal_hrd_parameters_present_flag) {
6884 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6885 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6886 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6889 if (sps->vcl_hrd_parameters_present_flag) {
6890 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6891 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6892 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6896 h->sei_buffering_period_present = 1;
6900 static int decode_sei(H264Context *h){
6901 MpegEncContext * const s = &h->s;
6903 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6908 type+= show_bits(&s->gb, 8);
6909 }while(get_bits(&s->gb, 8) == 255);
6913 size+= show_bits(&s->gb, 8);
6914 }while(get_bits(&s->gb, 8) == 255);
6917 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6918 if(decode_picture_timing(h) < 0)
6921 case SEI_TYPE_USER_DATA_UNREGISTERED:
6922 if(decode_unregistered_user_data(h, size) < 0)
6925 case SEI_TYPE_RECOVERY_POINT:
6926 if(decode_recovery_point(h) < 0)
6929 case SEI_BUFFERING_PERIOD:
6930 if(decode_buffering_period(h) < 0)
6934 skip_bits(&s->gb, 8*size);
6937 //FIXME check bits here
6938 align_get_bits(&s->gb);
6944 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6945 MpegEncContext * const s = &h->s;
6947 cpb_count = get_ue_golomb_31(&s->gb) + 1;
6949 if(cpb_count > 32U){
6950 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6954 get_bits(&s->gb, 4); /* bit_rate_scale */
6955 get_bits(&s->gb, 4); /* cpb_size_scale */
6956 for(i=0; i<cpb_count; i++){
6957 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6958 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6959 get_bits1(&s->gb); /* cbr_flag */
6961 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6962 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6963 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6964 sps->time_offset_length = get_bits(&s->gb, 5);
6965 sps->cpb_cnt = cpb_count;
6969 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6970 MpegEncContext * const s = &h->s;
6971 int aspect_ratio_info_present_flag;
6972 unsigned int aspect_ratio_idc;
6974 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6976 if( aspect_ratio_info_present_flag ) {
6977 aspect_ratio_idc= get_bits(&s->gb, 8);
6978 if( aspect_ratio_idc == EXTENDED_SAR ) {
6979 sps->sar.num= get_bits(&s->gb, 16);
6980 sps->sar.den= get_bits(&s->gb, 16);
6981 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6982 sps->sar= pixel_aspect[aspect_ratio_idc];
6984 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6991 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6993 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6994 get_bits1(&s->gb); /* overscan_appropriate_flag */
6997 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6998 get_bits(&s->gb, 3); /* video_format */
6999 get_bits1(&s->gb); /* video_full_range_flag */
7000 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7001 get_bits(&s->gb, 8); /* colour_primaries */
7002 get_bits(&s->gb, 8); /* transfer_characteristics */
7003 get_bits(&s->gb, 8); /* matrix_coefficients */
7007 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7008 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7009 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7012 sps->timing_info_present_flag = get_bits1(&s->gb);
7013 if(sps->timing_info_present_flag){
7014 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7015 sps->time_scale = get_bits_long(&s->gb, 32);
7016 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7019 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7020 if(sps->nal_hrd_parameters_present_flag)
7021 if(decode_hrd_parameters(h, sps) < 0)
7023 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7024 if(sps->vcl_hrd_parameters_present_flag)
7025 if(decode_hrd_parameters(h, sps) < 0)
7027 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7028 get_bits1(&s->gb); /* low_delay_hrd_flag */
7029 sps->pic_struct_present_flag = get_bits1(&s->gb);
7031 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7032 if(sps->bitstream_restriction_flag){
7033 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7034 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7035 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7036 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7037 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7038 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7039 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7041 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7042 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7050 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7051 const uint8_t *jvt_list, const uint8_t *fallback_list){
7052 MpegEncContext * const s = &h->s;
7053 int i, last = 8, next = 8;
7054 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7055 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7056 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7058 for(i=0;i<size;i++){
7060 next = (last + get_se_golomb(&s->gb)) & 0xff;
7061 if(!i && !next){ /* matrix not written, we use the preset one */
7062 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7065 last = factors[scan[i]] = next ? next : last;
7069 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7070 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7071 MpegEncContext * const s = &h->s;
7072 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7073 const uint8_t *fallback[4] = {
7074 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7075 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7076 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7077 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7079 if(get_bits1(&s->gb)){
7080 sps->scaling_matrix_present |= is_sps;
7081 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7082 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7083 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7084 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7085 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7086 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7087 if(is_sps || pps->transform_8x8_mode){
7088 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7089 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7094 static inline int decode_seq_parameter_set(H264Context *h){
7095 MpegEncContext * const s = &h->s;
7096 int profile_idc, level_idc;
7097 unsigned int sps_id;
7101 profile_idc= get_bits(&s->gb, 8);
7102 get_bits1(&s->gb); //constraint_set0_flag
7103 get_bits1(&s->gb); //constraint_set1_flag
7104 get_bits1(&s->gb); //constraint_set2_flag
7105 get_bits1(&s->gb); //constraint_set3_flag
7106 get_bits(&s->gb, 4); // reserved
7107 level_idc= get_bits(&s->gb, 8);
7108 sps_id= get_ue_golomb_31(&s->gb);
7110 if(sps_id >= MAX_SPS_COUNT) {
7111 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7114 sps= av_mallocz(sizeof(SPS));
7118 sps->profile_idc= profile_idc;
7119 sps->level_idc= level_idc;
7121 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7122 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7123 sps->scaling_matrix_present = 0;
7125 if(sps->profile_idc >= 100){ //high profile
7126 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7127 if(sps->chroma_format_idc == 3)
7128 sps->residual_color_transform_flag = get_bits1(&s->gb);
7129 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7130 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7131 sps->transform_bypass = get_bits1(&s->gb);
7132 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7134 sps->chroma_format_idc= 1;
7137 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7138 sps->poc_type= get_ue_golomb_31(&s->gb);
7140 if(sps->poc_type == 0){ //FIXME #define
7141 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7142 } else if(sps->poc_type == 1){//FIXME #define
7143 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7144 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7145 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7146 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7148 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7149 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7153 for(i=0; i<sps->poc_cycle_length; i++)
7154 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7155 }else if(sps->poc_type != 2){
7156 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7160 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7161 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7162 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7165 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7166 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7167 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7168 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7169 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7170 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7174 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7175 if(!sps->frame_mbs_only_flag)
7176 sps->mb_aff= get_bits1(&s->gb);
7180 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7182 #ifndef ALLOW_INTERLACE
7184 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7186 sps->crop= get_bits1(&s->gb);
7188 sps->crop_left = get_ue_golomb(&s->gb);
7189 sps->crop_right = get_ue_golomb(&s->gb);
7190 sps->crop_top = get_ue_golomb(&s->gb);
7191 sps->crop_bottom= get_ue_golomb(&s->gb);
7192 if(sps->crop_left || sps->crop_top){
7193 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7195 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7196 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7202 sps->crop_bottom= 0;
7205 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7206 if( sps->vui_parameters_present_flag )
7207 decode_vui_parameters(h, sps);
7209 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7210 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7211 sps_id, sps->profile_idc, sps->level_idc,
7213 sps->ref_frame_count,
7214 sps->mb_width, sps->mb_height,
7215 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7216 sps->direct_8x8_inference_flag ? "8B8" : "",
7217 sps->crop_left, sps->crop_right,
7218 sps->crop_top, sps->crop_bottom,
7219 sps->vui_parameters_present_flag ? "VUI" : "",
7220 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7224 av_free(h->sps_buffers[sps_id]);
7225 h->sps_buffers[sps_id]= sps;
7234 build_qp_table(PPS *pps, int t, int index)
7237 for(i = 0; i < 52; i++)
7238 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7241 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7242 MpegEncContext * const s = &h->s;
7243 unsigned int pps_id= get_ue_golomb(&s->gb);
7246 if(pps_id >= MAX_PPS_COUNT) {
7247 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7251 pps= av_mallocz(sizeof(PPS));
7254 pps->sps_id= get_ue_golomb_31(&s->gb);
7255 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7256 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7260 pps->cabac= get_bits1(&s->gb);
7261 pps->pic_order_present= get_bits1(&s->gb);
7262 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7263 if(pps->slice_group_count > 1 ){
7264 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7265 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7266 switch(pps->mb_slice_group_map_type){
7269 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7270 | run_length[ i ] |1 |ue(v) |
7275 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7277 | top_left_mb[ i ] |1 |ue(v) |
7278 | bottom_right_mb[ i ] |1 |ue(v) |
7286 | slice_group_change_direction_flag |1 |u(1) |
7287 | slice_group_change_rate_minus1 |1 |ue(v) |
7292 | slice_group_id_cnt_minus1 |1 |ue(v) |
7293 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7295 | slice_group_id[ i ] |1 |u(v) |
7300 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7301 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7302 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7303 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7307 pps->weighted_pred= get_bits1(&s->gb);
7308 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7309 pps->init_qp= get_se_golomb(&s->gb) + 26;
7310 pps->init_qs= get_se_golomb(&s->gb) + 26;
7311 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7312 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7313 pps->constrained_intra_pred= get_bits1(&s->gb);
7314 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7316 pps->transform_8x8_mode= 0;
7317 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7318 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7319 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7321 if(get_bits_count(&s->gb) < bit_length){
7322 pps->transform_8x8_mode= get_bits1(&s->gb);
7323 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7324 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7326 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7329 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7330 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7331 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7332 h->pps.chroma_qp_diff= 1;
7334 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7335 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7336 pps_id, pps->sps_id,
7337 pps->cabac ? "CABAC" : "CAVLC",
7338 pps->slice_group_count,
7339 pps->ref_count[0], pps->ref_count[1],
7340 pps->weighted_pred ? "weighted" : "",
7341 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7342 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7343 pps->constrained_intra_pred ? "CONSTR" : "",
7344 pps->redundant_pic_cnt_present ? "REDU" : "",
7345 pps->transform_8x8_mode ? "8x8DCT" : ""
7349 av_free(h->pps_buffers[pps_id]);
7350 h->pps_buffers[pps_id]= pps;
7358 * Call decode_slice() for each context.
7360 * @param h h264 master context
7361 * @param context_count number of contexts to execute
7363 static void execute_decode_slices(H264Context *h, int context_count){
7364 MpegEncContext * const s = &h->s;
7365 AVCodecContext * const avctx= s->avctx;
7369 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7371 if(context_count == 1) {
7372 decode_slice(avctx, &h);
7374 for(i = 1; i < context_count; i++) {
7375 hx = h->thread_context[i];
7376 hx->s.error_recognition = avctx->error_recognition;
7377 hx->s.error_count = 0;
7380 avctx->execute(avctx, (void *)decode_slice,
7381 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7383 /* pull back stuff from slices to master context */
7384 hx = h->thread_context[context_count - 1];
7385 s->mb_x = hx->s.mb_x;
7386 s->mb_y = hx->s.mb_y;
7387 s->dropable = hx->s.dropable;
7388 s->picture_structure = hx->s.picture_structure;
7389 for(i = 1; i < context_count; i++)
7390 h->s.error_count += h->thread_context[i]->s.error_count;
7395 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7396 MpegEncContext * const s = &h->s;
7397 AVCodecContext * const avctx= s->avctx;
7399 H264Context *hx; ///< thread context
7400 int context_count = 0;
7402 h->max_contexts = avctx->thread_count;
7405 for(i=0; i<50; i++){
7406 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7409 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7410 h->current_slice = 0;
7411 if (!s->first_field)
7412 s->current_picture_ptr= NULL;
7424 if(buf_index >= buf_size) break;
7426 for(i = 0; i < h->nal_length_size; i++)
7427 nalsize = (nalsize << 8) | buf[buf_index++];
7428 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7433 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7438 // start code prefix search
7439 for(; buf_index + 3 < buf_size; buf_index++){
7440 // This should always succeed in the first iteration.
7441 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7445 if(buf_index+3 >= buf_size) break;
7450 hx = h->thread_context[context_count];
7452 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7453 if (ptr==NULL || dst_length < 0){
7456 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7458 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7460 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7461 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7464 if (h->is_avc && (nalsize != consumed)){
7465 int i, debug_level = AV_LOG_DEBUG;
7466 for (i = consumed; i < nalsize; i++)
7467 if (buf[buf_index+i])
7468 debug_level = AV_LOG_ERROR;
7469 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7473 buf_index += consumed;
7475 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7476 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7481 switch(hx->nal_unit_type){
7483 if (h->nal_unit_type != NAL_IDR_SLICE) {
7484 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7487 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7489 init_get_bits(&hx->s.gb, ptr, bit_length);
7491 hx->inter_gb_ptr= &hx->s.gb;
7492 hx->s.data_partitioning = 0;
7494 if((err = decode_slice_header(hx, h)))
7497 s->current_picture_ptr->key_frame |=
7498 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7499 (h->sei_recovery_frame_cnt >= 0);
7500 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7501 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7502 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7503 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7504 && avctx->skip_frame < AVDISCARD_ALL){
7505 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7506 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7507 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7508 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7514 init_get_bits(&hx->s.gb, ptr, bit_length);
7516 hx->inter_gb_ptr= NULL;
7517 hx->s.data_partitioning = 1;
7519 err = decode_slice_header(hx, h);
7522 init_get_bits(&hx->intra_gb, ptr, bit_length);
7523 hx->intra_gb_ptr= &hx->intra_gb;
7526 init_get_bits(&hx->inter_gb, ptr, bit_length);
7527 hx->inter_gb_ptr= &hx->inter_gb;
7529 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7530 && s->context_initialized
7532 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7533 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7534 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7535 && avctx->skip_frame < AVDISCARD_ALL)
7539 init_get_bits(&s->gb, ptr, bit_length);
7543 init_get_bits(&s->gb, ptr, bit_length);
7544 decode_seq_parameter_set(h);
7546 if(s->flags& CODEC_FLAG_LOW_DELAY)
7549 if(avctx->has_b_frames < 2)
7550 avctx->has_b_frames= !s->low_delay;
7553 init_get_bits(&s->gb, ptr, bit_length);
7555 decode_picture_parameter_set(h, bit_length);
7559 case NAL_END_SEQUENCE:
7560 case NAL_END_STREAM:
7561 case NAL_FILLER_DATA:
7563 case NAL_AUXILIARY_SLICE:
7566 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7569 if(context_count == h->max_contexts) {
7570 execute_decode_slices(h, context_count);
7575 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7577 /* Slice could not be decoded in parallel mode, copy down
7578 * NAL unit stuff to context 0 and restart. Note that
7579 * rbsp_buffer is not transferred, but since we no longer
7580 * run in parallel mode this should not be an issue. */
7581 h->nal_unit_type = hx->nal_unit_type;
7582 h->nal_ref_idc = hx->nal_ref_idc;
7588 execute_decode_slices(h, context_count);
7593 * returns the number of bytes consumed for building the current frame
7595 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7596 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7597 if(pos+10>buf_size) pos=buf_size; // oops ;)
7602 static int decode_frame(AVCodecContext *avctx,
7603 void *data, int *data_size,
7604 const uint8_t *buf, int buf_size)
7606 H264Context *h = avctx->priv_data;
7607 MpegEncContext *s = &h->s;
7608 AVFrame *pict = data;
7611 s->flags= avctx->flags;
7612 s->flags2= avctx->flags2;
7614 /* end of stream, output what is still in the buffers */
7615 if (buf_size == 0) {
7619 //FIXME factorize this with the output code below
7620 out = h->delayed_pic[0];
7622 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7623 if(h->delayed_pic[i]->poc < out->poc){
7624 out = h->delayed_pic[i];
7628 for(i=out_idx; h->delayed_pic[i]; i++)
7629 h->delayed_pic[i] = h->delayed_pic[i+1];
7632 *data_size = sizeof(AVFrame);
7633 *pict= *(AVFrame*)out;
7639 if(h->is_avc && !h->got_avcC) {
7640 int i, cnt, nalsize;
7641 unsigned char *p = avctx->extradata;
7642 if(avctx->extradata_size < 7) {
7643 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7647 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7650 /* sps and pps in the avcC always have length coded with 2 bytes,
7651 so put a fake nal_length_size = 2 while parsing them */
7652 h->nal_length_size = 2;
7653 // Decode sps from avcC
7654 cnt = *(p+5) & 0x1f; // Number of sps
7656 for (i = 0; i < cnt; i++) {
7657 nalsize = AV_RB16(p) + 2;
7658 if(decode_nal_units(h, p, nalsize) < 0) {
7659 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7664 // Decode pps from avcC
7665 cnt = *(p++); // Number of pps
7666 for (i = 0; i < cnt; i++) {
7667 nalsize = AV_RB16(p) + 2;
7668 if(decode_nal_units(h, p, nalsize) != nalsize) {
7669 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7674 // Now store right nal length size, that will be use to parse all other nals
7675 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7676 // Do not reparse avcC
7680 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7681 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7686 buf_index=decode_nal_units(h, buf, buf_size);
7690 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7691 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7692 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7696 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7697 Picture *out = s->current_picture_ptr;
7698 Picture *cur = s->current_picture_ptr;
7699 int i, pics, cross_idr, out_of_order, out_idx;
7703 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7704 s->current_picture_ptr->pict_type= s->pict_type;
7706 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7707 ff_vdpau_h264_set_reference_frames(s);
7710 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7711 h->prev_poc_msb= h->poc_msb;
7712 h->prev_poc_lsb= h->poc_lsb;
7714 h->prev_frame_num_offset= h->frame_num_offset;
7715 h->prev_frame_num= h->frame_num;
7717 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7718 ff_vdpau_h264_picture_complete(s);
7721 * FIXME: Error handling code does not seem to support interlaced
7722 * when slices span multiple rows
7723 * The ff_er_add_slice calls don't work right for bottom
7724 * fields; they cause massive erroneous error concealing
7725 * Error marking covers both fields (top and bottom).
7726 * This causes a mismatched s->error_count
7727 * and a bad error table. Further, the error count goes to
7728 * INT_MAX when called for bottom field, because mb_y is
7729 * past end by one (callers fault) and resync_mb_y != 0
7730 * causes problems for the first MB line, too.
7736 h->sei_recovery_frame_cnt = -1;
7737 h->sei_dpb_output_delay = 0;
7738 h->sei_cpb_removal_delay = -1;
7739 h->sei_buffering_period_present = 0;
7741 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7742 /* Wait for second field. */
7746 cur->repeat_pict = 0;
7748 /* Signal interlacing information externally. */
7749 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7750 if(h->sps.pic_struct_present_flag){
7751 switch (h->sei_pic_struct)
7753 case SEI_PIC_STRUCT_FRAME:
7754 cur->interlaced_frame = 0;
7756 case SEI_PIC_STRUCT_TOP_FIELD:
7757 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7758 case SEI_PIC_STRUCT_TOP_BOTTOM:
7759 case SEI_PIC_STRUCT_BOTTOM_TOP:
7760 cur->interlaced_frame = 1;
7762 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7763 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7764 // Signal the possibility of telecined film externally (pic_struct 5,6)
7765 // From these hints, let the applications decide if they apply deinterlacing.
7766 cur->repeat_pict = 1;
7767 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7769 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7770 // Force progressive here, as doubling interlaced frame is a bad idea.
7771 cur->interlaced_frame = 0;
7772 cur->repeat_pict = 2;
7774 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7775 cur->interlaced_frame = 0;
7776 cur->repeat_pict = 4;
7780 /* Derive interlacing flag from used decoding process. */
7781 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7784 if (cur->field_poc[0] != cur->field_poc[1]){
7785 /* Derive top_field_first from field pocs. */
7786 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7788 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7789 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7790 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7791 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7792 cur->top_field_first = 1;
7794 cur->top_field_first = 0;
7796 /* Most likely progressive */
7797 cur->top_field_first = 0;
7801 //FIXME do something with unavailable reference frames
7803 /* Sort B-frames into display order */
7805 if(h->sps.bitstream_restriction_flag
7806 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7807 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7811 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7812 && !h->sps.bitstream_restriction_flag){
7813 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7818 while(h->delayed_pic[pics]) pics++;
7820 assert(pics <= MAX_DELAYED_PIC_COUNT);
7822 h->delayed_pic[pics++] = cur;
7823 if(cur->reference == 0)
7824 cur->reference = DELAYED_PIC_REF;
7826 out = h->delayed_pic[0];
7828 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7829 if(h->delayed_pic[i]->poc < out->poc){
7830 out = h->delayed_pic[i];
7833 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7835 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7837 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7839 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7841 ((!cross_idr && out->poc > h->outputed_poc + 2)
7842 || cur->pict_type == FF_B_TYPE)))
7845 s->avctx->has_b_frames++;
7848 if(out_of_order || pics > s->avctx->has_b_frames){
7849 out->reference &= ~DELAYED_PIC_REF;
7850 for(i=out_idx; h->delayed_pic[i]; i++)
7851 h->delayed_pic[i] = h->delayed_pic[i+1];
7853 if(!out_of_order && pics > s->avctx->has_b_frames){
7854 *data_size = sizeof(AVFrame);
7856 h->outputed_poc = out->poc;
7857 *pict= *(AVFrame*)out;
7859 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7864 assert(pict->data[0] || !*data_size);
7865 ff_print_debug_info(s, pict);
7866 //printf("out %d\n", (int)pict->data[0]);
7869 /* Return the Picture timestamp as the frame number */
7870 /* we subtract 1 because it is added on utils.c */
7871 avctx->frame_number = s->picture_number - 1;
7873 return get_consumed_bytes(s, buf_index, buf_size);
7876 static inline void fill_mb_avail(H264Context *h){
7877 MpegEncContext * const s = &h->s;
7878 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7881 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7882 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7883 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7889 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7890 h->mb_avail[4]= 1; //FIXME move out
7891 h->mb_avail[5]= 0; //FIXME move out
7899 #define SIZE (COUNT*40)
7905 // int int_temp[10000];
7907 AVCodecContext avctx;
7909 dsputil_init(&dsp, &avctx);
7911 init_put_bits(&pb, temp, SIZE);
7912 printf("testing unsigned exp golomb\n");
7913 for(i=0; i<COUNT; i++){
7915 set_ue_golomb(&pb, i);
7916 STOP_TIMER("set_ue_golomb");
7918 flush_put_bits(&pb);
7920 init_get_bits(&gb, temp, 8*SIZE);
7921 for(i=0; i<COUNT; i++){
7924 s= show_bits(&gb, 24);
7927 j= get_ue_golomb(&gb);
7929 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7932 STOP_TIMER("get_ue_golomb");
7936 init_put_bits(&pb, temp, SIZE);
7937 printf("testing signed exp golomb\n");
7938 for(i=0; i<COUNT; i++){
7940 set_se_golomb(&pb, i - COUNT/2);
7941 STOP_TIMER("set_se_golomb");
7943 flush_put_bits(&pb);
7945 init_get_bits(&gb, temp, 8*SIZE);
7946 for(i=0; i<COUNT; i++){
7949 s= show_bits(&gb, 24);
7952 j= get_se_golomb(&gb);
7953 if(j != i - COUNT/2){
7954 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7957 STOP_TIMER("get_se_golomb");
7961 printf("testing 4x4 (I)DCT\n");
7964 uint8_t src[16], ref[16];
7965 uint64_t error= 0, max_error=0;
7967 for(i=0; i<COUNT; i++){
7969 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7970 for(j=0; j<16; j++){
7971 ref[j]= random()%255;
7972 src[j]= random()%255;
7975 h264_diff_dct_c(block, src, ref, 4);
7978 for(j=0; j<16; j++){
7979 // printf("%d ", block[j]);
7980 block[j]= block[j]*4;
7981 if(j&1) block[j]= (block[j]*4 + 2)/5;
7982 if(j&4) block[j]= (block[j]*4 + 2)/5;
7986 s->dsp.h264_idct_add(ref, block, 4);
7987 /* for(j=0; j<16; j++){
7988 printf("%d ", ref[j]);
7992 for(j=0; j<16; j++){
7993 int diff= FFABS(src[j] - ref[j]);
7996 max_error= FFMAX(max_error, diff);
7999 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8000 printf("testing quantizer\n");
8001 for(qp=0; qp<52; qp++){
8003 src1_block[i]= src2_block[i]= random()%255;
8006 printf("Testing NAL layer\n");
8008 uint8_t bitstream[COUNT];
8009 uint8_t nal[COUNT*2];
8011 memset(&h, 0, sizeof(H264Context));
8013 for(i=0; i<COUNT; i++){
8021 for(j=0; j<COUNT; j++){
8022 bitstream[j]= (random() % 255) + 1;
8025 for(j=0; j<zeros; j++){
8026 int pos= random() % COUNT;
8027 while(bitstream[pos] == 0){
8036 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8038 printf("encoding failed\n");
8042 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8046 if(out_length != COUNT){
8047 printf("incorrect length %d %d\n", out_length, COUNT);
8051 if(consumed != nal_length){
8052 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8056 if(memcmp(bitstream, out, COUNT)){
8057 printf("mismatch\n");
8063 printf("Testing RBSP\n");
8071 static av_cold int decode_end(AVCodecContext *avctx)
8073 H264Context *h = avctx->priv_data;
8074 MpegEncContext *s = &h->s;
8077 av_freep(&h->rbsp_buffer[0]);
8078 av_freep(&h->rbsp_buffer[1]);
8079 free_tables(h); //FIXME cleanup init stuff perhaps
8081 for(i = 0; i < MAX_SPS_COUNT; i++)
8082 av_freep(h->sps_buffers + i);
8084 for(i = 0; i < MAX_PPS_COUNT; i++)
8085 av_freep(h->pps_buffers + i);
8089 // memset(h, 0, sizeof(H264Context));
8095 AVCodec h264_decoder = {
8099 sizeof(H264Context),
8104 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8106 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8109 #if CONFIG_H264_VDPAU_DECODER
8110 AVCodec h264_vdpau_decoder = {
8114 sizeof(H264Context),
8119 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8121 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8125 #if CONFIG_SVQ3_DECODER