2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
43 * Value of Picture.reference when Picture is not a reference picture, but
44 * is held for delayed output.
46 #define DELAYED_PIC_REF 4
48 static VLC coeff_token_vlc[4];
49 static VLC chroma_dc_coeff_token_vlc;
51 static VLC total_zeros_vlc[15];
52 static VLC chroma_dc_total_zeros_vlc[3];
54 static VLC run_vlc[6];
57 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
58 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
59 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
60 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
62 static av_always_inline uint32_t pack16to32(int a, int b){
63 #ifdef WORDS_BIGENDIAN
64 return (b&0xFFFF) + (a<<16);
66 return (a&0xFFFF) + (b<<16);
70 const uint8_t ff_rem6[52]={
71 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
74 const uint8_t ff_div6[52]={
75 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
79 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
80 MpegEncContext * const s = &h->s;
81 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
82 int topleft_xy, top_xy, topright_xy, left_xy[2];
83 int topleft_type, top_type, topright_type, left_type[2];
85 int topleft_partition= -1;
88 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
90 //FIXME deblocking could skip the intra and nnz parts.
91 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
94 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
96 topleft_xy = top_xy - 1;
97 topright_xy= top_xy + 1;
98 left_xy[1] = left_xy[0] = mb_xy-1;
108 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
109 const int top_pair_xy = pair_xy - s->mb_stride;
110 const int topleft_pair_xy = top_pair_xy - 1;
111 const int topright_pair_xy = top_pair_xy + 1;
112 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
113 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
114 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
115 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
116 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
117 const int bottom = (s->mb_y & 1);
118 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
120 ? !curr_mb_frame_flag // bottom macroblock
121 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
123 top_xy -= s->mb_stride;
126 ? !curr_mb_frame_flag // bottom macroblock
127 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
129 topleft_xy -= s->mb_stride;
130 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
131 topleft_xy += s->mb_stride;
132 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
133 topleft_partition = 0;
136 ? !curr_mb_frame_flag // bottom macroblock
137 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
139 topright_xy -= s->mb_stride;
141 if (left_mb_frame_flag != curr_mb_frame_flag) {
142 left_xy[1] = left_xy[0] = pair_xy - 1;
143 if (curr_mb_frame_flag) {
164 left_xy[1] += s->mb_stride;
177 h->top_mb_xy = top_xy;
178 h->left_mb_xy[0] = left_xy[0];
179 h->left_mb_xy[1] = left_xy[1];
183 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
184 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
185 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
187 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
189 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
191 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
192 for(list=0; list<h->list_count; list++){
193 if(USES_LIST(mb_type,list)){
194 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
195 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
196 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
197 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
203 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
204 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
206 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
207 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
209 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
210 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
215 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
216 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
217 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
218 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
219 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
222 if(IS_INTRA(mb_type)){
223 h->topleft_samples_available=
224 h->top_samples_available=
225 h->left_samples_available= 0xFFFF;
226 h->topright_samples_available= 0xEEEA;
228 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
229 h->topleft_samples_available= 0xB3FF;
230 h->top_samples_available= 0x33FF;
231 h->topright_samples_available= 0x26EA;
234 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
254 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
270 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
293 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
294 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
295 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
296 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
298 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
299 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
301 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
302 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 h->non_zero_count_cache[4+8*0]=
306 h->non_zero_count_cache[5+8*0]=
307 h->non_zero_count_cache[6+8*0]=
308 h->non_zero_count_cache[7+8*0]=
310 h->non_zero_count_cache[1+8*0]=
311 h->non_zero_count_cache[2+8*0]=
313 h->non_zero_count_cache[1+8*3]=
314 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 for (i=0; i<2; i++) {
320 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
321 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
322 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
323 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 h->non_zero_count_cache[3+8*1 + 2*8*i]=
326 h->non_zero_count_cache[3+8*2 + 2*8*i]=
327 h->non_zero_count_cache[0+8*1 + 8*i]=
328 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
335 h->top_cbp = h->cbp_table[top_xy];
336 } else if(IS_INTRA(mb_type)) {
343 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
344 } else if(IS_INTRA(mb_type)) {
350 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 for(list=0; list<h->list_count; list++){
361 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
362 /*if(!h->mv_cache_clean[list]){
363 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
364 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
365 h->mv_cache_clean[list]= 1;
369 h->mv_cache_clean[list]= 0;
371 if(USES_LIST(top_type, list)){
372 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
373 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
374 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
378 h->ref_cache[list][scan8[0] + 0 - 1*8]=
379 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
380 h->ref_cache[list][scan8[0] + 2 - 1*8]=
381 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
387 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 int cache_idx = scan8[0] - 1 + i*2*8;
392 if(USES_LIST(left_type[i], list)){
393 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
394 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
395 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
396 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
397 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
398 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 *(uint32_t*)h->mv_cache [list][cache_idx ]=
401 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
402 h->ref_cache[list][cache_idx ]=
403 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
407 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
410 if(USES_LIST(topleft_type, list)){
411 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
412 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
413 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
414 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
417 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
420 if(USES_LIST(topright_type, list)){
421 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
422 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
423 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
424 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
427 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
430 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
433 h->ref_cache[list][scan8[5 ]+1] =
434 h->ref_cache[list][scan8[7 ]+1] =
435 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
436 h->ref_cache[list][scan8[4 ]] =
437 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
438 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
439 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
441 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
442 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 /* XXX beurk, Load mvd */
446 if(USES_LIST(top_type, list)){
447 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
448 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
458 if(USES_LIST(left_type[0], list)){
459 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
460 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
466 if(USES_LIST(left_type[1], list)){
467 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
474 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
475 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
477 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
478 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
480 if(h->slice_type == B_TYPE){
481 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
483 if(IS_DIRECT(top_type)){
484 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
485 }else if(IS_8X8(top_type)){
486 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
487 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
488 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
493 if(IS_DIRECT(left_type[0]))
494 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
495 else if(IS_8X8(left_type[0]))
496 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
500 if(IS_DIRECT(left_type[1]))
501 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
502 else if(IS_8X8(left_type[1]))
503 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
511 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
512 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
513 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
517 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
518 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
520 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 #define MAP_F2F(idx, mb_type)\
523 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
524 h->ref_cache[list][idx] <<= 1;\
525 h->mv_cache[list][idx][1] /= 2;\
526 h->mvd_cache[list][idx][1] /= 2;\
531 #define MAP_F2F(idx, mb_type)\
532 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
533 h->ref_cache[list][idx] >>= 1;\
534 h->mv_cache[list][idx][1] <<= 1;\
535 h->mvd_cache[list][idx][1] <<= 1;\
545 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
548 static inline void write_back_intra_pred_mode(H264Context *h){
549 MpegEncContext * const s = &h->s;
550 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
570 if(!(h->top_samples_available&0x8000)){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if(!(h->left_samples_available&0x8000)){
584 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
586 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
595 } //FIXME cleanup like next
598 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
600 static inline int check_intra_pred_mode(H264Context *h, int mode){
601 MpegEncContext * const s = &h->s;
602 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
603 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
606 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610 if(!(h->top_samples_available&0x8000)){
613 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
618 if(!(h->left_samples_available&0x8000)){
621 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
630 * gets the predicted intra4x4 prediction mode.
632 static inline int pred_intra_mode(H264Context *h, int n){
633 const int index8= scan8[n];
634 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
635 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
636 const int min= FFMIN(left, top);
638 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
640 if(min<0) return DC_PRED;
644 static inline void write_back_non_zero_count(H264Context *h){
645 MpegEncContext * const s = &h->s;
646 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
648 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
649 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
650 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
651 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
652 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
653 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
654 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
656 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
657 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
658 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
660 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
661 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
662 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
665 // store all luma nnzs, for deblocking
668 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
669 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
674 * gets the predicted number of non zero coefficients.
675 * @param n block index
677 static inline int pred_non_zero_count(H264Context *h, int n){
678 const int index8= scan8[n];
679 const int left= h->non_zero_count_cache[index8 - 1];
680 const int top = h->non_zero_count_cache[index8 - 8];
683 if(i<64) i= (i+1)>>1;
685 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
690 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
691 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
692 MpegEncContext *s = &h->s;
694 /* there is no consistent mapping of mvs to neighboring locations that will
695 * make mbaff happy, so we can't move all this logic to fill_caches */
697 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
699 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
700 *C = h->mv_cache[list][scan8[0]-2];
703 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
704 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
705 if(IS_INTERLACED(mb_types[topright_xy])){
706 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
707 const int x4 = X4, y4 = Y4;\
708 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
709 if(!USES_LIST(mb_type,list))\
710 return LIST_NOT_USED;\
711 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
712 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
713 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
714 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
716 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
719 if(topright_ref == PART_NOT_AVAILABLE
720 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
721 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
723 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
724 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
727 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
729 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
730 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
736 if(topright_ref != PART_NOT_AVAILABLE){
737 *C= h->mv_cache[list][ i - 8 + part_width ];
740 tprintf(s->avctx, "topright MV not available\n");
742 *C= h->mv_cache[list][ i - 8 - 1 ];
743 return h->ref_cache[list][ i - 8 - 1 ];
748 * gets the predicted MV.
749 * @param n the block index
750 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
751 * @param mx the x component of the predicted motion vector
752 * @param my the y component of the predicted motion vector
754 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
755 const int index8= scan8[n];
756 const int top_ref= h->ref_cache[list][ index8 - 8 ];
757 const int left_ref= h->ref_cache[list][ index8 - 1 ];
758 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
759 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
761 int diagonal_ref, match_count;
763 assert(part_width==1 || part_width==2 || part_width==4);
773 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
774 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
775 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
776 if(match_count > 1){ //most common
777 *mx= mid_pred(A[0], B[0], C[0]);
778 *my= mid_pred(A[1], B[1], C[1]);
779 }else if(match_count==1){
783 }else if(top_ref==ref){
791 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
795 *mx= mid_pred(A[0], B[0], C[0]);
796 *my= mid_pred(A[1], B[1], C[1]);
800 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
804 * gets the directionally predicted 16x8 MV.
805 * @param n the block index
806 * @param mx the x component of the predicted motion vector
807 * @param my the y component of the predicted motion vector
809 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
811 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
812 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
814 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
822 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
823 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
825 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
835 pred_motion(h, n, 4, list, ref, mx, my);
839 * gets the directionally predicted 8x16 MV.
840 * @param n the block index
841 * @param mx the x component of the predicted motion vector
842 * @param my the y component of the predicted motion vector
844 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
846 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
847 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
849 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
860 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
862 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
864 if(diagonal_ref == ref){
872 pred_motion(h, n, 2, list, ref, mx, my);
875 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
876 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
877 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
879 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
881 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
882 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
883 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
889 pred_motion(h, 0, 4, 0, 0, mx, my);
894 static inline void direct_dist_scale_factor(H264Context * const h){
895 const int poc = h->s.current_picture_ptr->poc;
896 const int poc1 = h->ref_list[1][0].poc;
898 for(i=0; i<h->ref_count[0]; i++){
899 int poc0 = h->ref_list[0][i].poc;
900 int td = av_clip(poc1 - poc0, -128, 127);
901 if(td == 0 /* FIXME || pic0 is a long-term ref */){
902 h->dist_scale_factor[i] = 256;
904 int tb = av_clip(poc - poc0, -128, 127);
905 int tx = (16384 + (FFABS(td) >> 1)) / td;
906 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
910 for(i=0; i<h->ref_count[0]; i++){
911 h->dist_scale_factor_field[2*i] =
912 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
916 static inline void direct_ref_list_init(H264Context * const h){
917 MpegEncContext * const s = &h->s;
918 Picture * const ref1 = &h->ref_list[1][0];
919 Picture * const cur = s->current_picture_ptr;
921 if(cur->pict_type == I_TYPE)
922 cur->ref_count[0] = 0;
923 if(cur->pict_type != B_TYPE)
924 cur->ref_count[1] = 0;
925 for(list=0; list<2; list++){
926 cur->ref_count[list] = h->ref_count[list];
927 for(j=0; j<h->ref_count[list]; j++)
928 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
930 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
932 for(list=0; list<2; list++){
933 for(i=0; i<ref1->ref_count[list]; i++){
934 const int poc = ref1->ref_poc[list][i];
935 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
936 for(j=0; j<h->ref_count[list]; j++)
937 if(h->ref_list[list][j].poc == poc){
938 h->map_col_to_list0[list][i] = j;
944 for(list=0; list<2; list++){
945 for(i=0; i<ref1->ref_count[list]; i++){
946 j = h->map_col_to_list0[list][i];
947 h->map_col_to_list0_field[list][2*i] = 2*j;
948 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
954 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
955 MpegEncContext * const s = &h->s;
956 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
957 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
958 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
959 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
960 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
961 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
962 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
963 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
964 const int is_b8x8 = IS_8X8(*mb_type);
965 unsigned int sub_mb_type;
968 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
969 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
970 /* FIXME save sub mb types from previous frames (or derive from MVs)
971 * so we know exactly what block size to use */
972 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
973 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
974 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
975 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
976 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
978 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
979 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
982 *mb_type |= MB_TYPE_DIRECT2;
984 *mb_type |= MB_TYPE_INTERLACED;
986 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
988 if(h->direct_spatial_mv_pred){
993 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
995 /* ref = min(neighbors) */
996 for(list=0; list<2; list++){
997 int refa = h->ref_cache[list][scan8[0] - 1];
998 int refb = h->ref_cache[list][scan8[0] - 8];
999 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1001 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1003 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1005 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1011 if(ref[0] < 0 && ref[1] < 0){
1012 ref[0] = ref[1] = 0;
1013 mv[0][0] = mv[0][1] =
1014 mv[1][0] = mv[1][1] = 0;
1016 for(list=0; list<2; list++){
1018 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1020 mv[list][0] = mv[list][1] = 0;
1026 *mb_type &= ~MB_TYPE_L1;
1027 sub_mb_type &= ~MB_TYPE_L1;
1028 }else if(ref[0] < 0){
1030 *mb_type &= ~MB_TYPE_L0;
1031 sub_mb_type &= ~MB_TYPE_L0;
1034 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1035 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1036 int mb_types_col[2];
1037 int b8_stride = h->b8_stride;
1038 int b4_stride = h->b_stride;
1040 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1042 if(IS_INTERLACED(*mb_type)){
1043 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1044 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1046 l1ref0 -= 2*b8_stride;
1047 l1ref1 -= 2*b8_stride;
1048 l1mv0 -= 4*b4_stride;
1049 l1mv1 -= 4*b4_stride;
1054 int cur_poc = s->current_picture_ptr->poc;
1055 int *col_poc = h->ref_list[1]->field_poc;
1056 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1057 int dy = 2*col_parity - (s->mb_y&1);
1059 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1060 l1ref0 += dy*b8_stride;
1061 l1ref1 += dy*b8_stride;
1062 l1mv0 += 2*dy*b4_stride;
1063 l1mv1 += 2*dy*b4_stride;
1067 for(i8=0; i8<4; i8++){
1070 int xy8 = x8+y8*b8_stride;
1071 int xy4 = 3*x8+y8*b4_stride;
1074 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1076 h->sub_mb_type[i8] = sub_mb_type;
1078 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1079 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1080 if(!IS_INTRA(mb_types_col[y8])
1081 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1082 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1084 a= pack16to32(mv[0][0],mv[0][1]);
1086 b= pack16to32(mv[1][0],mv[1][1]);
1088 a= pack16to32(mv[0][0],mv[0][1]);
1089 b= pack16to32(mv[1][0],mv[1][1]);
1091 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1092 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1094 }else if(IS_16X16(*mb_type)){
1097 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1098 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1099 if(!IS_INTRA(mb_type_col)
1100 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1101 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1102 && (h->x264_build>33 || !h->x264_build)))){
1104 a= pack16to32(mv[0][0],mv[0][1]);
1106 b= pack16to32(mv[1][0],mv[1][1]);
1108 a= pack16to32(mv[0][0],mv[0][1]);
1109 b= pack16to32(mv[1][0],mv[1][1]);
1111 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1112 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1114 for(i8=0; i8<4; i8++){
1115 const int x8 = i8&1;
1116 const int y8 = i8>>1;
1118 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1120 h->sub_mb_type[i8] = sub_mb_type;
1122 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1123 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1124 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1125 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1128 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1129 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1130 && (h->x264_build>33 || !h->x264_build)))){
1131 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1132 if(IS_SUB_8X8(sub_mb_type)){
1133 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1134 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1136 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1138 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1141 for(i4=0; i4<4; i4++){
1142 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1143 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1145 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1147 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1153 }else{ /* direct temporal mv pred */
1154 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1155 const int *dist_scale_factor = h->dist_scale_factor;
1158 if(IS_INTERLACED(*mb_type)){
1159 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1160 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1161 dist_scale_factor = h->dist_scale_factor_field;
1163 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1164 /* FIXME assumes direct_8x8_inference == 1 */
1165 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1166 int mb_types_col[2];
1169 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1170 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1171 | (*mb_type & MB_TYPE_INTERLACED);
1172 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1174 if(IS_INTERLACED(*mb_type)){
1175 /* frame to field scaling */
1176 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1177 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1179 l1ref0 -= 2*h->b8_stride;
1180 l1ref1 -= 2*h->b8_stride;
1181 l1mv0 -= 4*h->b_stride;
1182 l1mv1 -= 4*h->b_stride;
1186 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1187 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1189 *mb_type |= MB_TYPE_16x8;
1191 *mb_type |= MB_TYPE_8x8;
1193 /* field to frame scaling */
1194 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1195 * but in MBAFF, top and bottom POC are equal */
1196 int dy = (s->mb_y&1) ? 1 : 2;
1198 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1199 l1ref0 += dy*h->b8_stride;
1200 l1ref1 += dy*h->b8_stride;
1201 l1mv0 += 2*dy*h->b_stride;
1202 l1mv1 += 2*dy*h->b_stride;
1205 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1207 *mb_type |= MB_TYPE_16x16;
1209 *mb_type |= MB_TYPE_8x8;
1212 for(i8=0; i8<4; i8++){
1213 const int x8 = i8&1;
1214 const int y8 = i8>>1;
1216 const int16_t (*l1mv)[2]= l1mv0;
1218 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1220 h->sub_mb_type[i8] = sub_mb_type;
1222 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1223 if(IS_INTRA(mb_types_col[y8])){
1224 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1225 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1226 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1230 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1232 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1234 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1237 scale = dist_scale_factor[ref0];
1238 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1241 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1242 int my_col = (mv_col[1]<<y_shift)/2;
1243 int mx = (scale * mv_col[0] + 128) >> 8;
1244 int my = (scale * my_col + 128) >> 8;
1245 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1246 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1253 /* one-to-one mv scaling */
1255 if(IS_16X16(*mb_type)){
1258 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1259 if(IS_INTRA(mb_type_col)){
1262 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1263 : map_col_to_list0[1][l1ref1[0]];
1264 const int scale = dist_scale_factor[ref0];
1265 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1267 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1268 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1270 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1271 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1273 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1274 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1275 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1277 for(i8=0; i8<4; i8++){
1278 const int x8 = i8&1;
1279 const int y8 = i8>>1;
1281 const int16_t (*l1mv)[2]= l1mv0;
1283 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1285 h->sub_mb_type[i8] = sub_mb_type;
1286 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 if(IS_INTRA(mb_type_col)){
1288 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1289 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1290 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1294 ref0 = l1ref0[x8 + y8*h->b8_stride];
1296 ref0 = map_col_to_list0[0][ref0];
1298 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1301 scale = dist_scale_factor[ref0];
1303 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1304 if(IS_SUB_8X8(sub_mb_type)){
1305 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1306 int mx = (scale * mv_col[0] + 128) >> 8;
1307 int my = (scale * mv_col[1] + 128) >> 8;
1308 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1309 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1311 for(i4=0; i4<4; i4++){
1312 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1313 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1314 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1315 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1316 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1317 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1324 static inline void write_back_motion(H264Context *h, int mb_type){
1325 MpegEncContext * const s = &h->s;
1326 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1327 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1330 if(!USES_LIST(mb_type, 0))
1331 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1333 for(list=0; list<h->list_count; list++){
1335 if(!USES_LIST(mb_type, list))
1339 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1340 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1342 if( h->pps.cabac ) {
1343 if(IS_SKIP(mb_type))
1344 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1347 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1348 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1353 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1354 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1355 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1356 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1357 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1361 if(h->slice_type == B_TYPE && h->pps.cabac){
1362 if(IS_8X8(mb_type)){
1363 uint8_t *direct_table = &h->direct_table[b8_xy];
1364 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1365 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1366 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1372 * Decodes a network abstraction layer unit.
1373 * @param consumed is the number of bytes used as input
1374 * @param length is the length of the array
1375 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1376 * @returns decoded bytes, might be src+1 if no escapes
1378 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1383 // src[0]&0x80; //forbidden bit
1384 h->nal_ref_idc= src[0]>>5;
1385 h->nal_unit_type= src[0]&0x1F;
1389 for(i=0; i<length; i++)
1390 printf("%2X ", src[i]);
1392 for(i=0; i+1<length; i+=2){
1393 if(src[i]) continue;
1394 if(i>0 && src[i-1]==0) i--;
1395 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1397 /* startcode, so we must be past the end */
1404 if(i>=length-1){ //no escaped 0
1405 *dst_length= length;
1406 *consumed= length+1; //+1 for the header
1410 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1411 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1412 dst= h->rbsp_buffer[bufidx];
1418 //printf("decoding esc\n");
1421 //remove escapes (very rare 1:2^22)
1422 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1423 if(src[si+2]==3){ //escape
1428 }else //next start code
1432 dst[di++]= src[si++];
1436 *consumed= si + 1;//+1 for the header
1437 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1442 * identifies the exact end of the bitstream
1443 * @return the length of the trailing, or 0 if damaged
1445 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1449 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1459 * idct tranforms the 16 dc values and dequantize them.
1460 * @param qp quantization parameter
1462 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1465 int temp[16]; //FIXME check if this is a good idea
1466 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1467 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1469 //memset(block, 64, 2*256);
1472 const int offset= y_offset[i];
1473 const int z0= block[offset+stride*0] + block[offset+stride*4];
1474 const int z1= block[offset+stride*0] - block[offset+stride*4];
1475 const int z2= block[offset+stride*1] - block[offset+stride*5];
1476 const int z3= block[offset+stride*1] + block[offset+stride*5];
1485 const int offset= x_offset[i];
1486 const int z0= temp[4*0+i] + temp[4*2+i];
1487 const int z1= temp[4*0+i] - temp[4*2+i];
1488 const int z2= temp[4*1+i] - temp[4*3+i];
1489 const int z3= temp[4*1+i] + temp[4*3+i];
1491 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1492 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1493 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1494 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1500 * dct tranforms the 16 dc values.
1501 * @param qp quantization parameter ??? FIXME
1503 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1504 // const int qmul= dequant_coeff[qp][0];
1506 int temp[16]; //FIXME check if this is a good idea
1507 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1508 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1511 const int offset= y_offset[i];
1512 const int z0= block[offset+stride*0] + block[offset+stride*4];
1513 const int z1= block[offset+stride*0] - block[offset+stride*4];
1514 const int z2= block[offset+stride*1] - block[offset+stride*5];
1515 const int z3= block[offset+stride*1] + block[offset+stride*5];
1524 const int offset= x_offset[i];
1525 const int z0= temp[4*0+i] + temp[4*2+i];
1526 const int z1= temp[4*0+i] - temp[4*2+i];
1527 const int z2= temp[4*1+i] - temp[4*3+i];
1528 const int z3= temp[4*1+i] + temp[4*3+i];
1530 block[stride*0 +offset]= (z0 + z3)>>1;
1531 block[stride*2 +offset]= (z1 + z2)>>1;
1532 block[stride*8 +offset]= (z1 - z2)>>1;
1533 block[stride*10+offset]= (z0 - z3)>>1;
1541 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1542 const int stride= 16*2;
1543 const int xStride= 16;
1546 a= block[stride*0 + xStride*0];
1547 b= block[stride*0 + xStride*1];
1548 c= block[stride*1 + xStride*0];
1549 d= block[stride*1 + xStride*1];
1556 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1557 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1558 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1559 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1563 static void chroma_dc_dct_c(DCTELEM *block){
1564 const int stride= 16*2;
1565 const int xStride= 16;
1568 a= block[stride*0 + xStride*0];
1569 b= block[stride*0 + xStride*1];
1570 c= block[stride*1 + xStride*0];
1571 d= block[stride*1 + xStride*1];
1578 block[stride*0 + xStride*0]= (a+c);
1579 block[stride*0 + xStride*1]= (e+b);
1580 block[stride*1 + xStride*0]= (a-c);
1581 block[stride*1 + xStride*1]= (e-b);
1586 * gets the chroma qp.
1588 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1589 return h->pps.chroma_qp_table[t][qscale & 0xff];
1592 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1593 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1594 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1596 const int * const quant_table= quant_coeff[qscale];
1597 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1598 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1599 const unsigned int threshold2= (threshold1<<1);
1605 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1606 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1607 const unsigned int dc_threshold2= (dc_threshold1<<1);
1609 int level= block[0]*quant_coeff[qscale+18][0];
1610 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1612 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1615 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1618 // last_non_zero = i;
1623 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1624 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1625 const unsigned int dc_threshold2= (dc_threshold1<<1);
1627 int level= block[0]*quant_table[0];
1628 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1630 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1633 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1636 // last_non_zero = i;
1649 const int j= scantable[i];
1650 int level= block[j]*quant_table[j];
1652 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1653 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1654 if(((unsigned)(level+threshold1))>threshold2){
1656 level= (bias + level)>>QUANT_SHIFT;
1659 level= (bias - level)>>QUANT_SHIFT;
1668 return last_non_zero;
1671 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1672 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1673 int src_x_offset, int src_y_offset,
1674 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1675 MpegEncContext * const s = &h->s;
1676 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1677 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1678 const int luma_xy= (mx&3) + ((my&3)<<2);
1679 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1680 uint8_t * src_cb, * src_cr;
1681 int extra_width= h->emu_edge_width;
1682 int extra_height= h->emu_edge_height;
1684 const int full_mx= mx>>2;
1685 const int full_my= my>>2;
1686 const int pic_width = 16*s->mb_width;
1687 const int pic_height = 16*s->mb_height >> MB_FIELD;
1689 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1692 if(mx&7) extra_width -= 3;
1693 if(my&7) extra_height -= 3;
1695 if( full_mx < 0-extra_width
1696 || full_my < 0-extra_height
1697 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1698 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1699 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1700 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1704 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1706 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1709 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1712 // chroma offset when predicting from a field of opposite parity
1713 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1714 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1716 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1717 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1720 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1721 src_cb= s->edge_emu_buffer;
1723 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1726 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1727 src_cr= s->edge_emu_buffer;
1729 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1732 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1733 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1734 int x_offset, int y_offset,
1735 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1736 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1737 int list0, int list1){
1738 MpegEncContext * const s = &h->s;
1739 qpel_mc_func *qpix_op= qpix_put;
1740 h264_chroma_mc_func chroma_op= chroma_put;
1742 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1743 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1744 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1745 x_offset += 8*s->mb_x;
1746 y_offset += 8*(s->mb_y >> MB_FIELD);
1749 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1750 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1751 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1752 qpix_op, chroma_op);
1755 chroma_op= chroma_avg;
1759 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1760 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1761 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1762 qpix_op, chroma_op);
1766 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1767 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1768 int x_offset, int y_offset,
1769 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1770 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1771 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1772 int list0, int list1){
1773 MpegEncContext * const s = &h->s;
1775 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1776 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1777 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1778 x_offset += 8*s->mb_x;
1779 y_offset += 8*(s->mb_y >> MB_FIELD);
1782 /* don't optimize for luma-only case, since B-frames usually
1783 * use implicit weights => chroma too. */
1784 uint8_t *tmp_cb = s->obmc_scratchpad;
1785 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1786 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1787 int refn0 = h->ref_cache[0][ scan8[n] ];
1788 int refn1 = h->ref_cache[1][ scan8[n] ];
1790 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1791 dest_y, dest_cb, dest_cr,
1792 x_offset, y_offset, qpix_put, chroma_put);
1793 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1794 tmp_y, tmp_cb, tmp_cr,
1795 x_offset, y_offset, qpix_put, chroma_put);
1797 if(h->use_weight == 2){
1798 int weight0 = h->implicit_weight[refn0][refn1];
1799 int weight1 = 64 - weight0;
1800 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1801 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1802 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1804 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1805 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1806 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1807 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1808 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1809 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1810 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1811 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1812 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1815 int list = list1 ? 1 : 0;
1816 int refn = h->ref_cache[list][ scan8[n] ];
1817 Picture *ref= &h->ref_list[list][refn];
1818 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1819 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1820 qpix_put, chroma_put);
1822 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1823 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1824 if(h->use_weight_chroma){
1825 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1827 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1828 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1833 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1834 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1835 int x_offset, int y_offset,
1836 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1837 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1838 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1839 int list0, int list1){
1840 if((h->use_weight==2 && list0 && list1
1841 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1842 || h->use_weight==1)
1843 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1844 x_offset, y_offset, qpix_put, chroma_put,
1845 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1847 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1848 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1851 static inline void prefetch_motion(H264Context *h, int list){
1852 /* fetch pixels for estimated mv 4 macroblocks ahead
1853 * optimized for 64byte cache lines */
1854 MpegEncContext * const s = &h->s;
1855 const int refn = h->ref_cache[list][scan8[0]];
1857 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1858 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1859 uint8_t **src= h->ref_list[list][refn].data;
1860 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1861 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1862 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1863 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1867 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1868 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1869 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1870 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1871 MpegEncContext * const s = &h->s;
1872 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1873 const int mb_type= s->current_picture.mb_type[mb_xy];
1875 assert(IS_INTER(mb_type));
1877 prefetch_motion(h, 0);
1879 if(IS_16X16(mb_type)){
1880 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1881 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1882 &weight_op[0], &weight_avg[0],
1883 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1884 }else if(IS_16X8(mb_type)){
1885 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1886 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1887 &weight_op[1], &weight_avg[1],
1888 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1889 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1890 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1891 &weight_op[1], &weight_avg[1],
1892 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1893 }else if(IS_8X16(mb_type)){
1894 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1895 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1896 &weight_op[2], &weight_avg[2],
1897 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1898 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1899 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1900 &weight_op[2], &weight_avg[2],
1901 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1905 assert(IS_8X8(mb_type));
1908 const int sub_mb_type= h->sub_mb_type[i];
1910 int x_offset= (i&1)<<2;
1911 int y_offset= (i&2)<<1;
1913 if(IS_SUB_8X8(sub_mb_type)){
1914 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1915 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1916 &weight_op[3], &weight_avg[3],
1917 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1918 }else if(IS_SUB_8X4(sub_mb_type)){
1919 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1920 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1921 &weight_op[4], &weight_avg[4],
1922 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1923 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1924 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1925 &weight_op[4], &weight_avg[4],
1926 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1927 }else if(IS_SUB_4X8(sub_mb_type)){
1928 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1929 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1930 &weight_op[5], &weight_avg[5],
1931 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1932 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1933 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1934 &weight_op[5], &weight_avg[5],
1935 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1938 assert(IS_SUB_4X4(sub_mb_type));
1940 int sub_x_offset= x_offset + 2*(j&1);
1941 int sub_y_offset= y_offset + (j&2);
1942 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1943 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1944 &weight_op[6], &weight_avg[6],
1945 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1951 prefetch_motion(h, 1);
1954 static void decode_init_vlc(void){
1955 static int done = 0;
1961 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1962 &chroma_dc_coeff_token_len [0], 1, 1,
1963 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1966 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1967 &coeff_token_len [i][0], 1, 1,
1968 &coeff_token_bits[i][0], 1, 1, 1);
1972 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1973 &chroma_dc_total_zeros_len [i][0], 1, 1,
1974 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1976 for(i=0; i<15; i++){
1977 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1978 &total_zeros_len [i][0], 1, 1,
1979 &total_zeros_bits[i][0], 1, 1, 1);
1983 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1984 &run_len [i][0], 1, 1,
1985 &run_bits[i][0], 1, 1, 1);
1987 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1988 &run_len [6][0], 1, 1,
1989 &run_bits[6][0], 1, 1, 1);
1993 static void free_tables(H264Context *h){
1996 av_freep(&h->intra4x4_pred_mode);
1997 av_freep(&h->chroma_pred_mode_table);
1998 av_freep(&h->cbp_table);
1999 av_freep(&h->mvd_table[0]);
2000 av_freep(&h->mvd_table[1]);
2001 av_freep(&h->direct_table);
2002 av_freep(&h->non_zero_count);
2003 av_freep(&h->slice_table_base);
2004 h->slice_table= NULL;
2006 av_freep(&h->mb2b_xy);
2007 av_freep(&h->mb2b8_xy);
2009 for(i = 0; i < MAX_SPS_COUNT; i++)
2010 av_freep(h->sps_buffers + i);
2012 for(i = 0; i < MAX_PPS_COUNT; i++)
2013 av_freep(h->pps_buffers + i);
2015 for(i = 0; i < h->s.avctx->thread_count; i++) {
2016 hx = h->thread_context[i];
2018 av_freep(&hx->top_borders[1]);
2019 av_freep(&hx->top_borders[0]);
2020 av_freep(&hx->s.obmc_scratchpad);
2024 static void init_dequant8_coeff_table(H264Context *h){
2026 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2027 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2028 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2030 for(i=0; i<2; i++ ){
2031 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2032 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2036 for(q=0; q<52; q++){
2037 int shift = ff_div6[q];
2038 int idx = ff_rem6[q];
2040 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2041 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2042 h->pps.scaling_matrix8[i][x]) << shift;
2047 static void init_dequant4_coeff_table(H264Context *h){
2049 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2050 for(i=0; i<6; i++ ){
2051 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2053 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2054 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2061 for(q=0; q<52; q++){
2062 int shift = ff_div6[q] + 2;
2063 int idx = ff_rem6[q];
2065 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2066 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2067 h->pps.scaling_matrix4[i][x]) << shift;
2072 static void init_dequant_tables(H264Context *h){
2074 init_dequant4_coeff_table(h);
2075 if(h->pps.transform_8x8_mode)
2076 init_dequant8_coeff_table(h);
2077 if(h->sps.transform_bypass){
2080 h->dequant4_coeff[i][0][x] = 1<<6;
2081 if(h->pps.transform_8x8_mode)
2084 h->dequant8_coeff[i][0][x] = 1<<6;
2091 * needs width/height
2093 static int alloc_tables(H264Context *h){
2094 MpegEncContext * const s = &h->s;
2095 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2098 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2102 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2104 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2105 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2106 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2107 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2109 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2110 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2112 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2113 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2114 for(y=0; y<s->mb_height; y++){
2115 for(x=0; x<s->mb_width; x++){
2116 const int mb_xy= x + y*s->mb_stride;
2117 const int b_xy = 4*x + 4*y*h->b_stride;
2118 const int b8_xy= 2*x + 2*y*h->b8_stride;
2120 h->mb2b_xy [mb_xy]= b_xy;
2121 h->mb2b8_xy[mb_xy]= b8_xy;
2125 s->obmc_scratchpad = NULL;
2127 if(!h->dequant4_coeff[0])
2128 init_dequant_tables(h);
2137 * Mimic alloc_tables(), but for every context thread.
2139 static void clone_tables(H264Context *dst, H264Context *src){
2140 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2141 dst->non_zero_count = src->non_zero_count;
2142 dst->slice_table = src->slice_table;
2143 dst->cbp_table = src->cbp_table;
2144 dst->mb2b_xy = src->mb2b_xy;
2145 dst->mb2b8_xy = src->mb2b8_xy;
2146 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2147 dst->mvd_table[0] = src->mvd_table[0];
2148 dst->mvd_table[1] = src->mvd_table[1];
2149 dst->direct_table = src->direct_table;
2151 dst->s.obmc_scratchpad = NULL;
2152 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2157 * Allocate buffers which are not shared amongst multiple threads.
2159 static int context_init(H264Context *h){
2160 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2161 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2165 return -1; // free_tables will clean up for us
2168 static void common_init(H264Context *h){
2169 MpegEncContext * const s = &h->s;
2171 s->width = s->avctx->width;
2172 s->height = s->avctx->height;
2173 s->codec_id= s->avctx->codec->id;
2175 ff_h264_pred_init(&h->hpc, s->codec_id);
2177 h->dequant_coeff_pps= -1;
2178 s->unrestricted_mv=1;
2179 s->decode=1; //FIXME
2181 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2182 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2185 static int decode_init(AVCodecContext *avctx){
2186 H264Context *h= avctx->priv_data;
2187 MpegEncContext * const s = &h->s;
2189 MPV_decode_defaults(s);
2194 s->out_format = FMT_H264;
2195 s->workaround_bugs= avctx->workaround_bugs;
2198 // s->decode_mb= ff_h263_decode_mb;
2199 s->quarter_sample = 1;
2201 avctx->pix_fmt= PIX_FMT_YUV420P;
2205 if(avctx->extradata_size > 0 && avctx->extradata &&
2206 *(char *)avctx->extradata == 1){
2213 h->thread_context[0] = h;
2217 static int frame_start(H264Context *h){
2218 MpegEncContext * const s = &h->s;
2221 if(MPV_frame_start(s, s->avctx) < 0)
2223 ff_er_frame_start(s);
2225 * MPV_frame_start uses pict_type to derive key_frame.
2226 * This is incorrect for H.264; IDR markings must be used.
2227 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2228 * See decode_nal_units().
2230 s->current_picture_ptr->key_frame= 0;
2232 assert(s->linesize && s->uvlinesize);
2234 for(i=0; i<16; i++){
2235 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2236 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2239 h->block_offset[16+i]=
2240 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[24+16+i]=
2242 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2245 /* can't be in alloc_tables because linesize isn't known there.
2246 * FIXME: redo bipred weight to not require extra buffer? */
2247 for(i = 0; i < s->avctx->thread_count; i++)
2248 if(!h->thread_context[i]->s.obmc_scratchpad)
2249 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2251 /* some macroblocks will be accessed before they're available */
2252 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2253 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2255 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2259 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2260 MpegEncContext * const s = &h->s;
2264 src_cb -= uvlinesize;
2265 src_cr -= uvlinesize;
2267 // There are two lines saved, the line above the the top macroblock of a pair,
2268 // and the line above the bottom macroblock
2269 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2270 for(i=1; i<17; i++){
2271 h->left_border[i]= src_y[15+i* linesize];
2274 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2275 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2277 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2278 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2279 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2281 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2282 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2284 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2289 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2290 MpegEncContext * const s = &h->s;
2297 if(h->deblocking_filter == 2) {
2298 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2299 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2300 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2302 deblock_left = (s->mb_x > 0);
2303 deblock_top = (s->mb_y > 0);
2306 src_y -= linesize + 1;
2307 src_cb -= uvlinesize + 1;
2308 src_cr -= uvlinesize + 1;
2310 #define XCHG(a,b,t,xchg)\
2317 for(i = !deblock_top; i<17; i++){
2318 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2323 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2324 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2325 if(s->mb_x+1 < s->mb_width){
2326 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2330 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2332 for(i = !deblock_top; i<9; i++){
2333 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2334 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2338 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2339 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2344 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2345 MpegEncContext * const s = &h->s;
2348 src_y -= 2 * linesize;
2349 src_cb -= 2 * uvlinesize;
2350 src_cr -= 2 * uvlinesize;
2352 // There are two lines saved, the line above the the top macroblock of a pair,
2353 // and the line above the bottom macroblock
2354 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2355 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2356 for(i=2; i<34; i++){
2357 h->left_border[i]= src_y[15+i* linesize];
2360 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2361 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2362 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2363 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2365 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2366 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2367 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2368 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2369 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2370 for(i=2; i<18; i++){
2371 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2372 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2374 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2375 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2376 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2377 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2381 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2382 MpegEncContext * const s = &h->s;
2385 int deblock_left = (s->mb_x > 0);
2386 int deblock_top = (s->mb_y > 1);
2388 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2390 src_y -= 2 * linesize + 1;
2391 src_cb -= 2 * uvlinesize + 1;
2392 src_cr -= 2 * uvlinesize + 1;
2394 #define XCHG(a,b,t,xchg)\
2401 for(i = (!deblock_top)<<1; i<34; i++){
2402 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2407 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2408 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2409 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2410 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2411 if(s->mb_x+1 < s->mb_width){
2412 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2413 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2417 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2419 for(i = (!deblock_top) << 1; i<18; i++){
2420 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2421 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2425 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2426 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2427 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2428 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2433 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2434 MpegEncContext * const s = &h->s;
2435 const int mb_x= s->mb_x;
2436 const int mb_y= s->mb_y;
2437 const int mb_xy= mb_x + mb_y*s->mb_stride;
2438 const int mb_type= s->current_picture.mb_type[mb_xy];
2439 uint8_t *dest_y, *dest_cb, *dest_cr;
2440 int linesize, uvlinesize /*dct_offset*/;
2442 int *block_offset = &h->block_offset[0];
2443 const unsigned int bottom = mb_y & 1;
2444 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2445 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2446 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2448 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2449 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2450 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2452 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2453 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2455 if (!simple && MB_FIELD) {
2456 linesize = h->mb_linesize = s->linesize * 2;
2457 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2458 block_offset = &h->block_offset[24];
2459 if(mb_y&1){ //FIXME move out of this func?
2460 dest_y -= s->linesize*15;
2461 dest_cb-= s->uvlinesize*7;
2462 dest_cr-= s->uvlinesize*7;
2466 for(list=0; list<h->list_count; list++){
2467 if(!USES_LIST(mb_type, list))
2469 if(IS_16X16(mb_type)){
2470 int8_t *ref = &h->ref_cache[list][scan8[0]];
2471 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2473 for(i=0; i<16; i+=4){
2474 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2475 int ref = h->ref_cache[list][scan8[i]];
2477 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2483 linesize = h->mb_linesize = s->linesize;
2484 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2485 // dct_offset = s->linesize * 16;
2488 if(transform_bypass){
2490 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2491 }else if(IS_8x8DCT(mb_type)){
2492 idct_dc_add = s->dsp.h264_idct8_dc_add;
2493 idct_add = s->dsp.h264_idct8_add;
2495 idct_dc_add = s->dsp.h264_idct_dc_add;
2496 idct_add = s->dsp.h264_idct_add;
2499 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2500 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2501 int mbt_y = mb_y&~1;
2502 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2503 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2504 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2505 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2508 if (!simple && IS_INTRA_PCM(mb_type)) {
2511 // The pixels are stored in h->mb array in the same order as levels,
2512 // copy them in output in the correct order.
2513 for(i=0; i<16; i++) {
2514 for (y=0; y<4; y++) {
2515 for (x=0; x<4; x++) {
2516 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2520 for(i=16; i<16+4; i++) {
2521 for (y=0; y<4; y++) {
2522 for (x=0; x<4; x++) {
2523 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2527 for(i=20; i<20+4; i++) {
2528 for (y=0; y<4; y++) {
2529 for (x=0; x<4; x++) {
2530 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2535 if(IS_INTRA(mb_type)){
2536 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2537 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2539 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2540 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2541 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2544 if(IS_INTRA4x4(mb_type)){
2545 if(simple || !s->encoding){
2546 if(IS_8x8DCT(mb_type)){
2547 for(i=0; i<16; i+=4){
2548 uint8_t * const ptr= dest_y + block_offset[i];
2549 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2550 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2551 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2552 (h->topright_samples_available<<i)&0x4000, linesize);
2554 if(nnz == 1 && h->mb[i*16])
2555 idct_dc_add(ptr, h->mb + i*16, linesize);
2557 idct_add(ptr, h->mb + i*16, linesize);
2561 for(i=0; i<16; i++){
2562 uint8_t * const ptr= dest_y + block_offset[i];
2564 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2567 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2568 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2569 assert(mb_y || linesize <= block_offset[i]);
2570 if(!topright_avail){
2571 tr= ptr[3 - linesize]*0x01010101;
2572 topright= (uint8_t*) &tr;
2574 topright= ptr + 4 - linesize;
2578 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2579 nnz = h->non_zero_count_cache[ scan8[i] ];
2582 if(nnz == 1 && h->mb[i*16])
2583 idct_dc_add(ptr, h->mb + i*16, linesize);
2585 idct_add(ptr, h->mb + i*16, linesize);
2587 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2592 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2594 if(!transform_bypass)
2595 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2597 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2599 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2600 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2602 hl_motion(h, dest_y, dest_cb, dest_cr,
2603 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2604 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2605 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2609 if(!IS_INTRA4x4(mb_type)){
2611 if(IS_INTRA16x16(mb_type)){
2612 for(i=0; i<16; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ])
2614 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2615 else if(h->mb[i*16])
2616 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2619 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2620 for(i=0; i<16; i+=di){
2621 int nnz = h->non_zero_count_cache[ scan8[i] ];
2623 if(nnz==1 && h->mb[i*16])
2624 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2626 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2631 for(i=0; i<16; i++){
2632 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2633 uint8_t * const ptr= dest_y + block_offset[i];
2634 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2640 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2641 uint8_t *dest[2] = {dest_cb, dest_cr};
2642 if(transform_bypass){
2643 idct_add = idct_dc_add = s->dsp.add_pixels4;
2645 idct_add = s->dsp.h264_idct_add;
2646 idct_dc_add = s->dsp.h264_idct_dc_add;
2647 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2648 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2651 for(i=16; i<16+8; i++){
2652 if(h->non_zero_count_cache[ scan8[i] ])
2653 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2654 else if(h->mb[i*16])
2655 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2658 for(i=16; i<16+8; i++){
2659 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2660 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2661 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2667 if(h->deblocking_filter) {
2668 if (!simple && FRAME_MBAFF) {
2669 //FIXME try deblocking one mb at a time?
2670 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2671 const int mb_y = s->mb_y - 1;
2672 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2673 const int mb_xy= mb_x + mb_y*s->mb_stride;
2674 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2675 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2676 if (!bottom) return;
2677 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2678 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2679 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2681 if(IS_INTRA(mb_type_top | mb_type_bottom))
2682 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2684 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2688 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2689 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2690 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2691 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2692 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2695 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2696 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2697 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2698 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2699 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2701 tprintf(h->s.avctx, "call filter_mb\n");
2702 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2703 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2704 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2710 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2712 static void hl_decode_mb_simple(H264Context *h){
2713 hl_decode_mb_internal(h, 1);
2717 * Process a macroblock; this handles edge cases, such as interlacing.
2719 static void av_noinline hl_decode_mb_complex(H264Context *h){
2720 hl_decode_mb_internal(h, 0);
2723 static void hl_decode_mb(H264Context *h){
2724 MpegEncContext * const s = &h->s;
2725 const int mb_x= s->mb_x;
2726 const int mb_y= s->mb_y;
2727 const int mb_xy= mb_x + mb_y*s->mb_stride;
2728 const int mb_type= s->current_picture.mb_type[mb_xy];
2729 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2735 hl_decode_mb_complex(h);
2736 else hl_decode_mb_simple(h);
2739 static void pic_as_field(Picture *pic, const int parity){
2741 for (i = 0; i < 4; ++i) {
2742 if (parity == PICT_BOTTOM_FIELD)
2743 pic->data[i] += pic->linesize[i];
2744 pic->reference = parity;
2745 pic->linesize[i] *= 2;
2749 static int split_field_copy(Picture *dest, Picture *src,
2750 int parity, int id_add){
2751 int match = !!(src->reference & parity);
2755 pic_as_field(dest, parity);
2757 dest->pic_id += id_add;
2764 * Split one reference list into field parts, interleaving by parity
2765 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2766 * set to look at the actual start of data for that field.
2768 * @param dest output list
2769 * @param dest_len maximum number of fields to put in dest
2770 * @param src the source reference list containing fields and/or field pairs
2771 * (aka short_ref/long_ref, or
2772 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2773 * @param src_len number of Picture's in source (pairs and unmatched fields)
2774 * @param parity the parity of the picture being decoded/needing
2775 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2776 * @return number of fields placed in dest
2778 static int split_field_half_ref_list(Picture *dest, int dest_len,
2779 Picture *src, int src_len, int parity){
2780 int same_parity = 1;
2786 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2787 if (same_parity && same_i < src_len) {
2788 field_output = split_field_copy(dest + out_i, src + same_i,
2790 same_parity = !field_output;
2793 } else if (opp_i < src_len) {
2794 field_output = split_field_copy(dest + out_i, src + opp_i,
2795 PICT_FRAME - parity, 0);
2796 same_parity = field_output;
2808 * Split the reference frame list into a reference field list.
2809 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2810 * The input list contains both reference field pairs and
2811 * unmatched reference fields; it is ordered as spec describes
2812 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2813 * unmatched field pairs are also present. Conceptually this is equivalent
2814 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2816 * @param dest output reference list where ordered fields are to be placed
2817 * @param dest_len max number of fields to place at dest
2818 * @param src source reference list, as described above
2819 * @param src_len number of pictures (pairs and unmatched fields) in src
2820 * @param parity parity of field being currently decoded
2821 * (one of PICT_{TOP,BOTTOM}_FIELD)
2822 * @param long_i index into src array that holds first long reference picture,
2823 * or src_len if no long refs present.
2825 static int split_field_ref_list(Picture *dest, int dest_len,
2826 Picture *src, int src_len,
2827 int parity, int long_i){
2829 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2833 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2834 src_len - long_i, parity);
2839 * fills the default_ref_list.
2841 static int fill_default_ref_list(H264Context *h){
2842 MpegEncContext * const s = &h->s;
2844 int smallest_poc_greater_than_current = -1;
2846 Picture sorted_short_ref[32];
2847 Picture field_entry_list[2][32];
2848 Picture *frame_list[2];
2850 if (FIELD_PICTURE) {
2851 structure_sel = PICT_FRAME;
2852 frame_list[0] = field_entry_list[0];
2853 frame_list[1] = field_entry_list[1];
2856 frame_list[0] = h->default_ref_list[0];
2857 frame_list[1] = h->default_ref_list[1];
2860 if(h->slice_type==B_TYPE){
2867 /* sort frame according to poc in B slice */
2868 for(out_i=0; out_i<h->short_ref_count; out_i++){
2870 int best_poc=INT_MAX;
2872 for(i=0; i<h->short_ref_count; i++){
2873 const int poc= h->short_ref[i]->poc;
2874 if(poc > limit && poc < best_poc){
2880 assert(best_i != INT_MIN);
2883 sorted_short_ref[out_i]= *h->short_ref[best_i];
2884 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2885 if (-1 == smallest_poc_greater_than_current) {
2886 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2887 smallest_poc_greater_than_current = out_i;
2892 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2894 // find the largest poc
2895 for(list=0; list<2; list++){
2898 int step= list ? -1 : 1;
2900 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2902 while(j<0 || j>= h->short_ref_count){
2903 if(j != -99 && step == (list ? -1 : 1))
2906 j= smallest_poc_greater_than_current + (step>>1);
2908 sel = sorted_short_ref[j].reference | structure_sel;
2909 if(sel != PICT_FRAME) continue;
2910 frame_list[list][index ]= sorted_short_ref[j];
2911 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2913 short_len[list] = index;
2915 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2917 if(h->long_ref[i] == NULL) continue;
2918 sel = h->long_ref[i]->reference | structure_sel;
2919 if(sel != PICT_FRAME) continue;
2921 frame_list[ list ][index ]= *h->long_ref[i];
2922 frame_list[ list ][index++].pic_id= i;
2927 for(list=0; list<2; list++){
2929 len[list] = split_field_ref_list(h->default_ref_list[list],
2933 s->picture_structure,
2936 // swap the two first elements of L1 when L0 and L1 are identical
2937 if(list && len[0] > 1 && len[0] == len[1])
2938 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2940 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2944 if(len[list] < h->ref_count[ list ])
2945 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2952 for(i=0; i<h->short_ref_count; i++){
2954 sel = h->short_ref[i]->reference | structure_sel;
2955 if(sel != PICT_FRAME) continue;
2956 frame_list[0][index ]= *h->short_ref[i];
2957 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2960 for(i = 0; i < 16; i++){
2962 if(h->long_ref[i] == NULL) continue;
2963 sel = h->long_ref[i]->reference | structure_sel;
2964 if(sel != PICT_FRAME) continue;
2965 frame_list[0][index ]= *h->long_ref[i];
2966 frame_list[0][index++].pic_id= i;
2970 index = split_field_ref_list(h->default_ref_list[0],
2971 h->ref_count[0], frame_list[0],
2972 index, s->picture_structure,
2975 if(index < h->ref_count[0])
2976 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2979 for (i=0; i<h->ref_count[0]; i++) {
2980 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2982 if(h->slice_type==B_TYPE){
2983 for (i=0; i<h->ref_count[1]; i++) {
2984 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2991 static void print_short_term(H264Context *h);
2992 static void print_long_term(H264Context *h);
2995 * Extract structure information about the picture described by pic_num in
2996 * the current decoding context (frame or field). Note that pic_num is
2997 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2998 * @param pic_num picture number for which to extract structure information
2999 * @param structure one of PICT_XXX describing structure of picture
3001 * @return frame number (short term) or long term index of picture
3002 * described by pic_num
3004 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3005 MpegEncContext * const s = &h->s;
3007 *structure = s->picture_structure;
3010 /* opposite field */
3011 *structure ^= PICT_FRAME;
3018 static int decode_ref_pic_list_reordering(H264Context *h){
3019 MpegEncContext * const s = &h->s;
3020 int list, index, pic_structure;
3022 print_short_term(h);
3024 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3026 for(list=0; list<h->list_count; list++){
3027 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3029 if(get_bits1(&s->gb)){
3030 int pred= h->curr_pic_num;
3032 for(index=0; ; index++){
3033 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3034 unsigned int pic_id;
3036 Picture *ref = NULL;
3038 if(reordering_of_pic_nums_idc==3)
3041 if(index >= h->ref_count[list]){
3042 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3046 if(reordering_of_pic_nums_idc<3){
3047 if(reordering_of_pic_nums_idc<2){
3048 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3051 if(abs_diff_pic_num > h->max_pic_num){
3052 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3056 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3057 else pred+= abs_diff_pic_num;
3058 pred &= h->max_pic_num - 1;
3060 frame_num = pic_num_extract(h, pred, &pic_structure);
3062 for(i= h->short_ref_count-1; i>=0; i--){
3063 ref = h->short_ref[i];
3064 assert(ref->reference);
3065 assert(!ref->long_ref);
3066 if(ref->data[0] != NULL &&
3067 ref->frame_num == frame_num &&
3068 (ref->reference & pic_structure) &&
3069 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3076 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3078 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3081 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3084 ref = h->long_ref[long_idx];
3085 assert(!(ref && !ref->reference));
3086 if(ref && (ref->reference & pic_structure)){
3087 ref->pic_id= pic_id;
3088 assert(ref->long_ref);
3096 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3097 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3099 for(i=index; i+1<h->ref_count[list]; i++){
3100 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3103 for(; i > index; i--){
3104 h->ref_list[list][i]= h->ref_list[list][i-1];
3106 h->ref_list[list][index]= *ref;
3108 pic_as_field(&h->ref_list[list][index], pic_structure);
3112 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3118 for(list=0; list<h->list_count; list++){
3119 for(index= 0; index < h->ref_count[list]; index++){
3120 if(!h->ref_list[list][index].data[0])
3121 h->ref_list[list][index]= s->current_picture;
3125 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3126 direct_dist_scale_factor(h);
3127 direct_ref_list_init(h);
3131 static void fill_mbaff_ref_list(H264Context *h){
3133 for(list=0; list<2; list++){ //FIXME try list_count
3134 for(i=0; i<h->ref_count[list]; i++){
3135 Picture *frame = &h->ref_list[list][i];
3136 Picture *field = &h->ref_list[list][16+2*i];
3139 field[0].linesize[j] <<= 1;
3140 field[0].reference = PICT_TOP_FIELD;
3141 field[1] = field[0];
3143 field[1].data[j] += frame->linesize[j];
3144 field[1].reference = PICT_BOTTOM_FIELD;
3146 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3147 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3149 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3150 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3154 for(j=0; j<h->ref_count[1]; j++){
3155 for(i=0; i<h->ref_count[0]; i++)
3156 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3157 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3158 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3162 static int pred_weight_table(H264Context *h){
3163 MpegEncContext * const s = &h->s;
3165 int luma_def, chroma_def;
3168 h->use_weight_chroma= 0;
3169 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3170 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3171 luma_def = 1<<h->luma_log2_weight_denom;
3172 chroma_def = 1<<h->chroma_log2_weight_denom;
3174 for(list=0; list<2; list++){
3175 for(i=0; i<h->ref_count[list]; i++){
3176 int luma_weight_flag, chroma_weight_flag;
3178 luma_weight_flag= get_bits1(&s->gb);
3179 if(luma_weight_flag){
3180 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3181 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3182 if( h->luma_weight[list][i] != luma_def
3183 || h->luma_offset[list][i] != 0)
3186 h->luma_weight[list][i]= luma_def;
3187 h->luma_offset[list][i]= 0;
3190 chroma_weight_flag= get_bits1(&s->gb);
3191 if(chroma_weight_flag){
3194 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3195 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3196 if( h->chroma_weight[list][i][j] != chroma_def
3197 || h->chroma_offset[list][i][j] != 0)
3198 h->use_weight_chroma= 1;
3203 h->chroma_weight[list][i][j]= chroma_def;
3204 h->chroma_offset[list][i][j]= 0;
3208 if(h->slice_type != B_TYPE) break;
3210 h->use_weight= h->use_weight || h->use_weight_chroma;
3214 static void implicit_weight_table(H264Context *h){
3215 MpegEncContext * const s = &h->s;
3217 int cur_poc = s->current_picture_ptr->poc;
3219 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3220 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3222 h->use_weight_chroma= 0;
3227 h->use_weight_chroma= 2;
3228 h->luma_log2_weight_denom= 5;
3229 h->chroma_log2_weight_denom= 5;
3231 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3232 int poc0 = h->ref_list[0][ref0].poc;
3233 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3234 int poc1 = h->ref_list[1][ref1].poc;
3235 int td = av_clip(poc1 - poc0, -128, 127);
3237 int tb = av_clip(cur_poc - poc0, -128, 127);
3238 int tx = (16384 + (FFABS(td) >> 1)) / td;
3239 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3240 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3241 h->implicit_weight[ref0][ref1] = 32;
3243 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3245 h->implicit_weight[ref0][ref1] = 32;
3251 * Mark a picture as no longer needed for reference. The refmask
3252 * argument allows unreferencing of individual fields or the whole frame.
3253 * If the picture becomes entirely unreferenced, but is being held for
3254 * display purposes, it is marked as such.
3255 * @param refmask mask of fields to unreference; the mask is bitwise
3256 * anded with the reference marking of pic
3257 * @return non-zero if pic becomes entirely unreferenced (except possibly
3258 * for display purposes) zero if one of the fields remains in
3261 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3263 if (pic->reference &= refmask) {
3266 if(pic == h->delayed_output_pic)
3267 pic->reference=DELAYED_PIC_REF;
3269 for(i = 0; h->delayed_pic[i]; i++)
3270 if(pic == h->delayed_pic[i]){
3271 pic->reference=DELAYED_PIC_REF;
3280 * instantaneous decoder refresh.
3282 static void idr(H264Context *h){
3285 for(i=0; i<16; i++){
3286 if (h->long_ref[i] != NULL) {
3287 unreference_pic(h, h->long_ref[i], 0);
3288 h->long_ref[i]= NULL;
3291 h->long_ref_count=0;
3293 for(i=0; i<h->short_ref_count; i++){
3294 unreference_pic(h, h->short_ref[i], 0);
3295 h->short_ref[i]= NULL;
3297 h->short_ref_count=0;
3300 /* forget old pics after a seek */
3301 static void flush_dpb(AVCodecContext *avctx){
3302 H264Context *h= avctx->priv_data;
3304 for(i=0; i<16; i++) {
3305 if(h->delayed_pic[i])
3306 h->delayed_pic[i]->reference= 0;
3307 h->delayed_pic[i]= NULL;
3309 if(h->delayed_output_pic)
3310 h->delayed_output_pic->reference= 0;
3311 h->delayed_output_pic= NULL;
3313 if(h->s.current_picture_ptr)
3314 h->s.current_picture_ptr->reference= 0;
3315 h->s.first_field= 0;
3316 ff_mpeg_flush(avctx);
3320 * Find a Picture in the short term reference list by frame number.
3321 * @param frame_num frame number to search for
3322 * @param idx the index into h->short_ref where returned picture is found
3323 * undefined if no picture found.
3324 * @return pointer to the found picture, or NULL if no pic with the provided
3325 * frame number is found
3327 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3328 MpegEncContext * const s = &h->s;
3331 for(i=0; i<h->short_ref_count; i++){
3332 Picture *pic= h->short_ref[i];
3333 if(s->avctx->debug&FF_DEBUG_MMCO)
3334 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3335 if(pic->frame_num == frame_num) {
3344 * Remove a picture from the short term reference list by its index in
3345 * that list. This does no checking on the provided index; it is assumed
3346 * to be valid. Other list entries are shifted down.
3347 * @param i index into h->short_ref of picture to remove.
3349 static void remove_short_at_index(H264Context *h, int i){
3350 assert(i > 0 && i < h->short_ref_count);
3351 h->short_ref[i]= NULL;
3352 if (--h->short_ref_count)
3353 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3358 * @return the removed picture or NULL if an error occurs
3360 static Picture * remove_short(H264Context *h, int frame_num){
3361 MpegEncContext * const s = &h->s;
3365 if(s->avctx->debug&FF_DEBUG_MMCO)
3366 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3368 pic = find_short(h, frame_num, &i);
3370 remove_short_at_index(h, i);
3376 * Remove a picture from the long term reference list by its index in
3377 * that list. This does no checking on the provided index; it is assumed
3378 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3379 * @param i index into h->long_ref of picture to remove.
3381 static void remove_long_at_index(H264Context *h, int i){
3382 h->long_ref[i]= NULL;
3383 h->long_ref_count--;
3388 * @return the removed picture or NULL if an error occurs
3390 static Picture * remove_long(H264Context *h, int i){
3393 pic= h->long_ref[i];
3395 remove_long_at_index(h, i);
3401 * print short term list
3403 static void print_short_term(H264Context *h) {
3405 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3406 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3407 for(i=0; i<h->short_ref_count; i++){
3408 Picture *pic= h->short_ref[i];
3409 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3415 * print long term list
3417 static void print_long_term(H264Context *h) {
3419 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3420 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3421 for(i = 0; i < 16; i++){
3422 Picture *pic= h->long_ref[i];
3424 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3431 * Executes the reference picture marking (memory management control operations).
3433 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3434 MpegEncContext * const s = &h->s;
3436 int current_ref_assigned=0;
3439 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3440 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3442 for(i=0; i<mmco_count; i++){
3443 int structure, frame_num, unref_pic;
3444 if(s->avctx->debug&FF_DEBUG_MMCO)
3445 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3447 switch(mmco[i].opcode){
3448 case MMCO_SHORT2UNUSED:
3449 if(s->avctx->debug&FF_DEBUG_MMCO)
3450 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3451 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3452 pic = find_short(h, frame_num, &j);
3454 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3455 remove_short_at_index(h, j);
3456 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3457 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3459 case MMCO_SHORT2LONG:
3460 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3461 h->long_ref[mmco[i].long_arg]->frame_num ==
3462 mmco[i].short_pic_num / 2) {
3463 /* do nothing, we've already moved this field pair. */
3465 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3467 pic= remove_long(h, mmco[i].long_arg);
3468 if(pic) unreference_pic(h, pic, 0);
3470 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3471 if (h->long_ref[ mmco[i].long_arg ]){
3472 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3473 h->long_ref_count++;
3477 case MMCO_LONG2UNUSED:
3478 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3479 pic = h->long_ref[j];
3481 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3482 remove_long_at_index(h, j);
3483 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3484 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3488 if (FIELD_PICTURE && !s->first_field) {
3489 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3490 /* Just mark second field as referenced */
3492 } else if (s->current_picture_ptr->reference) {
3493 /* First field in pair is in short term list or
3494 * at a different long term index.
3495 * This is not allowed; see 7.4.3, notes 2 and 3.
3496 * Report the problem and keep the pair where it is,
3497 * and mark this field valid.
3499 av_log(h->s.avctx, AV_LOG_ERROR,
3500 "illegal long term reference assignment for second "
3501 "field in complementary field pair (first field is "
3502 "short term or has non-matching long index)\n");
3508 pic= remove_long(h, mmco[i].long_arg);
3509 if(pic) unreference_pic(h, pic, 0);
3511 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3512 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3513 h->long_ref_count++;
3516 s->current_picture_ptr->reference |= s->picture_structure;
3517 current_ref_assigned=1;
3519 case MMCO_SET_MAX_LONG:
3520 assert(mmco[i].long_arg <= 16);
3521 // just remove the long term which index is greater than new max
3522 for(j = mmco[i].long_arg; j<16; j++){
3523 pic = remove_long(h, j);
3524 if (pic) unreference_pic(h, pic, 0);
3528 while(h->short_ref_count){
3529 pic= remove_short(h, h->short_ref[0]->frame_num);
3530 if(pic) unreference_pic(h, pic, 0);
3532 for(j = 0; j < 16; j++) {
3533 pic= remove_long(h, j);
3534 if(pic) unreference_pic(h, pic, 0);
3541 if (!current_ref_assigned && FIELD_PICTURE &&
3542 !s->first_field && s->current_picture_ptr->reference) {
3544 /* Second field of complementary field pair; the first field of
3545 * which is already referenced. If short referenced, it
3546 * should be first entry in short_ref. If not, it must exist
3547 * in long_ref; trying to put it on the short list here is an
3548 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3550 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3551 /* Just mark the second field valid */
3552 s->current_picture_ptr->reference = PICT_FRAME;
3553 } else if (s->current_picture_ptr->long_ref) {
3554 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3555 "assignment for second field "
3556 "in complementary field pair "
3557 "(first field is long term)\n");
3560 * First field in reference, but not in any sensible place on our
3561 * reference lists. This shouldn't happen unless reference
3562 * handling somewhere else is wrong.
3566 current_ref_assigned = 1;
3569 if(!current_ref_assigned){
3570 pic= remove_short(h, s->current_picture_ptr->frame_num);
3572 unreference_pic(h, pic, 0);
3573 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3576 if(h->short_ref_count)
3577 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3579 h->short_ref[0]= s->current_picture_ptr;
3580 h->short_ref[0]->long_ref=0;
3581 h->short_ref_count++;
3582 s->current_picture_ptr->reference |= s->picture_structure;
3585 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3587 /* We have too many reference frames, probably due to corrupted
3588 * stream. Need to discard one frame. Prevents overrun of the
3589 * short_ref and long_ref buffers.
3591 av_log(h->s.avctx, AV_LOG_ERROR,
3592 "number of reference frames exceeds max (probably "
3593 "corrupt input), discarding one\n");
3595 if (h->long_ref_count) {
3596 for (i = 0; i < 16; ++i)
3601 pic = h->long_ref[i];
3602 remove_long_at_index(h, i);
3604 pic = h->short_ref[h->short_ref_count - 1];
3605 remove_short_at_index(h, h->short_ref_count - 1);
3607 unreference_pic(h, pic, 0);
3610 print_short_term(h);
3615 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3616 MpegEncContext * const s = &h->s;
3619 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3620 s->broken_link= get_bits1(gb) -1;
3621 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3622 if(h->mmco[0].long_arg == -1)
3625 h->mmco[0].opcode= MMCO_LONG;
3629 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3630 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3631 MMCOOpcode opcode= get_ue_golomb(gb);
3633 h->mmco[i].opcode= opcode;
3634 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3635 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3636 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3637 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3641 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3642 unsigned int long_arg= get_ue_golomb(gb);
3643 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3644 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3647 h->mmco[i].long_arg= long_arg;
3650 if(opcode > (unsigned)MMCO_LONG){
3651 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3654 if(opcode == MMCO_END)
3659 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3661 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3662 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3663 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3664 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3666 if (FIELD_PICTURE) {
3667 h->mmco[0].short_pic_num *= 2;
3668 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3669 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3680 static int init_poc(H264Context *h){
3681 MpegEncContext * const s = &h->s;
3682 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3685 if(h->nal_unit_type == NAL_IDR_SLICE){
3686 h->frame_num_offset= 0;
3688 if(h->frame_num < h->prev_frame_num)
3689 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3691 h->frame_num_offset= h->prev_frame_num_offset;
3694 if(h->sps.poc_type==0){
3695 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3697 if(h->nal_unit_type == NAL_IDR_SLICE){
3702 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3703 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3704 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3705 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3707 h->poc_msb = h->prev_poc_msb;
3708 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3710 field_poc[1] = h->poc_msb + h->poc_lsb;
3711 if(s->picture_structure == PICT_FRAME)
3712 field_poc[1] += h->delta_poc_bottom;
3713 }else if(h->sps.poc_type==1){
3714 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3717 if(h->sps.poc_cycle_length != 0)
3718 abs_frame_num = h->frame_num_offset + h->frame_num;
3722 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3725 expected_delta_per_poc_cycle = 0;
3726 for(i=0; i < h->sps.poc_cycle_length; i++)
3727 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3729 if(abs_frame_num > 0){
3730 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3731 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3733 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3734 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3735 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3739 if(h->nal_ref_idc == 0)
3740 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3742 field_poc[0] = expectedpoc + h->delta_poc[0];
3743 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3745 if(s->picture_structure == PICT_FRAME)
3746 field_poc[1] += h->delta_poc[1];
3749 if(h->nal_unit_type == NAL_IDR_SLICE){
3752 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3753 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3759 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3760 s->current_picture_ptr->field_poc[0]= field_poc[0];
3761 s->current_picture_ptr->poc = field_poc[0];
3763 if(s->picture_structure != PICT_TOP_FIELD) {
3764 s->current_picture_ptr->field_poc[1]= field_poc[1];
3765 s->current_picture_ptr->poc = field_poc[1];
3767 if(!FIELD_PICTURE || !s->first_field) {
3768 Picture *cur = s->current_picture_ptr;
3769 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3777 * initialize scan tables
3779 static void init_scan_tables(H264Context *h){
3780 MpegEncContext * const s = &h->s;
3782 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3783 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3784 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3786 for(i=0; i<16; i++){
3787 #define T(x) (x>>2) | ((x<<2) & 0xF)
3788 h->zigzag_scan[i] = T(zigzag_scan[i]);
3789 h-> field_scan[i] = T( field_scan[i]);
3793 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3794 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3795 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3796 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3797 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3799 for(i=0; i<64; i++){
3800 #define T(x) (x>>3) | ((x&7)<<3)
3801 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3802 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3803 h->field_scan8x8[i] = T(field_scan8x8[i]);
3804 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3808 if(h->sps.transform_bypass){ //FIXME same ugly
3809 h->zigzag_scan_q0 = zigzag_scan;
3810 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3811 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3812 h->field_scan_q0 = field_scan;
3813 h->field_scan8x8_q0 = field_scan8x8;
3814 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3816 h->zigzag_scan_q0 = h->zigzag_scan;
3817 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3818 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3819 h->field_scan_q0 = h->field_scan;
3820 h->field_scan8x8_q0 = h->field_scan8x8;
3821 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3826 * Replicates H264 "master" context to thread contexts.
3828 static void clone_slice(H264Context *dst, H264Context *src)
3830 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3831 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3832 dst->s.current_picture = src->s.current_picture;
3833 dst->s.linesize = src->s.linesize;
3834 dst->s.uvlinesize = src->s.uvlinesize;
3835 dst->s.first_field = src->s.first_field;
3837 dst->prev_poc_msb = src->prev_poc_msb;
3838 dst->prev_poc_lsb = src->prev_poc_lsb;
3839 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3840 dst->prev_frame_num = src->prev_frame_num;
3841 dst->short_ref_count = src->short_ref_count;
3843 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3844 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3845 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3846 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3848 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3849 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3853 * decodes a slice header.
3854 * this will allso call MPV_common_init() and frame_start() as needed
3856 * @param h h264context
3857 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3859 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3861 static int decode_slice_header(H264Context *h, H264Context *h0){
3862 MpegEncContext * const s = &h->s;
3863 MpegEncContext * const s0 = &h0->s;
3864 unsigned int first_mb_in_slice;
3865 unsigned int pps_id;
3866 int num_ref_idx_active_override_flag;
3867 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3868 unsigned int slice_type, tmp, i;
3869 int default_ref_list_done = 0;
3870 int last_pic_structure;
3872 s->dropable= h->nal_ref_idc == 0;
3874 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3875 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3876 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3878 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3879 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3882 first_mb_in_slice= get_ue_golomb(&s->gb);
3884 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3885 h0->current_slice = 0;
3886 if (!s0->first_field)
3887 s->current_picture_ptr= NULL;
3890 slice_type= get_ue_golomb(&s->gb);
3892 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3897 h->slice_type_fixed=1;
3899 h->slice_type_fixed=0;
3901 slice_type= slice_type_map[ slice_type ];
3902 if (slice_type == I_TYPE
3903 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3904 default_ref_list_done = 1;
3906 h->slice_type= slice_type;
3908 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3909 if (s->pict_type == B_TYPE && s0->last_picture_ptr == NULL) {
3910 av_log(h->s.avctx, AV_LOG_ERROR,
3911 "B picture before any references, skipping\n");
3915 pps_id= get_ue_golomb(&s->gb);
3916 if(pps_id>=MAX_PPS_COUNT){
3917 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3920 if(!h0->pps_buffers[pps_id]) {
3921 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3924 h->pps= *h0->pps_buffers[pps_id];
3926 if(!h0->sps_buffers[h->pps.sps_id]) {
3927 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3930 h->sps = *h0->sps_buffers[h->pps.sps_id];
3932 if(h == h0 && h->dequant_coeff_pps != pps_id){
3933 h->dequant_coeff_pps = pps_id;
3934 init_dequant_tables(h);
3937 s->mb_width= h->sps.mb_width;
3938 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3940 h->b_stride= s->mb_width*4;
3941 h->b8_stride= s->mb_width*2;
3943 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3944 if(h->sps.frame_mbs_only_flag)
3945 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3947 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3949 if (s->context_initialized
3950 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3952 return -1; // width / height changed during parallelized decoding
3956 if (!s->context_initialized) {
3958 return -1; // we cant (re-)initialize context during parallel decoding
3959 if (MPV_common_init(s) < 0)
3963 init_scan_tables(h);
3966 for(i = 1; i < s->avctx->thread_count; i++) {
3968 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3969 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3970 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3973 init_scan_tables(c);
3977 for(i = 0; i < s->avctx->thread_count; i++)
3978 if(context_init(h->thread_context[i]) < 0)
3981 s->avctx->width = s->width;
3982 s->avctx->height = s->height;
3983 s->avctx->sample_aspect_ratio= h->sps.sar;
3984 if(!s->avctx->sample_aspect_ratio.den)
3985 s->avctx->sample_aspect_ratio.den = 1;
3987 if(h->sps.timing_info_present_flag){
3988 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3989 if(h->x264_build > 0 && h->x264_build < 44)
3990 s->avctx->time_base.den *= 2;
3991 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3992 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3996 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3999 h->mb_aff_frame = 0;
4000 last_pic_structure = s0->picture_structure;
4001 if(h->sps.frame_mbs_only_flag){
4002 s->picture_structure= PICT_FRAME;
4004 if(get_bits1(&s->gb)) { //field_pic_flag
4005 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4007 s->picture_structure= PICT_FRAME;
4008 h->mb_aff_frame = h->sps.mb_aff;
4012 if(h0->current_slice == 0){
4013 /* See if we have a decoded first field looking for a pair... */
4014 if (s0->first_field) {
4015 assert(s0->current_picture_ptr);
4016 assert(s0->current_picture_ptr->data[0]);
4017 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4019 /* figure out if we have a complementary field pair */
4020 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4022 * Previous field is unmatched. Don't display it, but let it
4023 * remain for reference if marked as such.
4025 s0->current_picture_ptr = NULL;
4026 s0->first_field = FIELD_PICTURE;
4029 if (h->nal_ref_idc &&
4030 s0->current_picture_ptr->reference &&
4031 s0->current_picture_ptr->frame_num != h->frame_num) {
4033 * This and previous field were reference, but had
4034 * different frame_nums. Consider this field first in
4035 * pair. Throw away previous field except for reference
4038 s0->first_field = 1;
4039 s0->current_picture_ptr = NULL;
4042 /* Second field in complementary pair */
4043 s0->first_field = 0;
4048 /* Frame or first field in a potentially complementary pair */
4049 assert(!s0->current_picture_ptr);
4050 s0->first_field = FIELD_PICTURE;
4053 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4054 s0->first_field = 0;
4061 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4063 assert(s->mb_num == s->mb_width * s->mb_height);
4064 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4065 first_mb_in_slice >= s->mb_num){
4066 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4069 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4070 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4071 if (s->picture_structure == PICT_BOTTOM_FIELD)
4072 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4073 assert(s->mb_y < s->mb_height);
4075 if(s->picture_structure==PICT_FRAME){
4076 h->curr_pic_num= h->frame_num;
4077 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4079 h->curr_pic_num= 2*h->frame_num + 1;
4080 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4083 if(h->nal_unit_type == NAL_IDR_SLICE){
4084 get_ue_golomb(&s->gb); /* idr_pic_id */
4087 if(h->sps.poc_type==0){
4088 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4090 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4091 h->delta_poc_bottom= get_se_golomb(&s->gb);
4095 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4096 h->delta_poc[0]= get_se_golomb(&s->gb);
4098 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4099 h->delta_poc[1]= get_se_golomb(&s->gb);
4104 if(h->pps.redundant_pic_cnt_present){
4105 h->redundant_pic_count= get_ue_golomb(&s->gb);
4108 //set defaults, might be overriden a few line later
4109 h->ref_count[0]= h->pps.ref_count[0];
4110 h->ref_count[1]= h->pps.ref_count[1];
4112 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4113 if(h->slice_type == B_TYPE){
4114 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4116 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4118 if(num_ref_idx_active_override_flag){
4119 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4120 if(h->slice_type==B_TYPE)
4121 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4123 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4124 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4125 h->ref_count[0]= h->ref_count[1]= 1;
4129 if(h->slice_type == B_TYPE)
4136 if(!default_ref_list_done){
4137 fill_default_ref_list(h);
4140 if(decode_ref_pic_list_reordering(h) < 0)
4143 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4144 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4145 pred_weight_table(h);
4146 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4147 implicit_weight_table(h);
4152 decode_ref_pic_marking(h0, &s->gb);
4155 fill_mbaff_ref_list(h);
4157 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4158 tmp = get_ue_golomb(&s->gb);
4160 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4163 h->cabac_init_idc= tmp;
4166 h->last_qscale_diff = 0;
4167 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4169 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4173 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4174 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4175 //FIXME qscale / qp ... stuff
4176 if(h->slice_type == SP_TYPE){
4177 get_bits1(&s->gb); /* sp_for_switch_flag */
4179 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4180 get_se_golomb(&s->gb); /* slice_qs_delta */
4183 h->deblocking_filter = 1;
4184 h->slice_alpha_c0_offset = 0;
4185 h->slice_beta_offset = 0;
4186 if( h->pps.deblocking_filter_parameters_present ) {
4187 tmp= get_ue_golomb(&s->gb);
4189 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4192 h->deblocking_filter= tmp;
4193 if(h->deblocking_filter < 2)
4194 h->deblocking_filter^= 1; // 1<->0
4196 if( h->deblocking_filter ) {
4197 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4198 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4202 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4203 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4204 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4205 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4206 h->deblocking_filter= 0;
4208 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4209 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4210 /* Cheat slightly for speed:
4211 Do not bother to deblock across slices. */
4212 h->deblocking_filter = 2;
4214 h0->max_contexts = 1;
4215 if(!h0->single_decode_warning) {
4216 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4217 h0->single_decode_warning = 1;
4220 return 1; // deblocking switched inside frame
4225 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4226 slice_group_change_cycle= get_bits(&s->gb, ?);
4229 h0->last_slice_type = slice_type;
4230 h->slice_num = ++h0->current_slice;
4232 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4233 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4235 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4236 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4238 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4240 av_get_pict_type_char(h->slice_type),
4241 pps_id, h->frame_num,
4242 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4243 h->ref_count[0], h->ref_count[1],
4245 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4247 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4257 static inline int get_level_prefix(GetBitContext *gb){
4261 OPEN_READER(re, gb);
4262 UPDATE_CACHE(re, gb);
4263 buf=GET_CACHE(re, gb);
4265 log= 32 - av_log2(buf);
4267 print_bin(buf>>(32-log), log);
4268 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4271 LAST_SKIP_BITS(re, gb, log);
4272 CLOSE_READER(re, gb);
4277 static inline int get_dct8x8_allowed(H264Context *h){
4280 if(!IS_SUB_8X8(h->sub_mb_type[i])
4281 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4288 * decodes a residual block.
4289 * @param n block index
4290 * @param scantable scantable
4291 * @param max_coeff number of coefficients in the block
4292 * @return <0 if an error occured
4294 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4295 MpegEncContext * const s = &h->s;
4296 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4298 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4300 //FIXME put trailing_onex into the context
4302 if(n == CHROMA_DC_BLOCK_INDEX){
4303 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4304 total_coeff= coeff_token>>2;
4306 if(n == LUMA_DC_BLOCK_INDEX){
4307 total_coeff= pred_non_zero_count(h, 0);
4308 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4309 total_coeff= coeff_token>>2;
4311 total_coeff= pred_non_zero_count(h, n);
4312 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4313 total_coeff= coeff_token>>2;
4314 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4318 //FIXME set last_non_zero?
4322 if(total_coeff > (unsigned)max_coeff) {
4323 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4327 trailing_ones= coeff_token&3;
4328 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4329 assert(total_coeff<=16);
4331 for(i=0; i<trailing_ones; i++){
4332 level[i]= 1 - 2*get_bits1(gb);
4336 int level_code, mask;
4337 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4338 int prefix= get_level_prefix(gb);
4340 //first coefficient has suffix_length equal to 0 or 1
4341 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4343 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4345 level_code= (prefix<<suffix_length); //part
4346 }else if(prefix==14){
4348 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4350 level_code= prefix + get_bits(gb, 4); //part
4351 }else if(prefix==15){
4352 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4353 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4355 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4359 if(trailing_ones < 3) level_code += 2;
4364 mask= -(level_code&1);
4365 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4368 //remaining coefficients have suffix_length > 0
4369 for(;i<total_coeff;i++) {
4370 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4371 prefix = get_level_prefix(gb);
4373 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4374 }else if(prefix==15){
4375 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4377 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4380 mask= -(level_code&1);
4381 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4382 if(level_code > suffix_limit[suffix_length])
4387 if(total_coeff == max_coeff)
4390 if(n == CHROMA_DC_BLOCK_INDEX)
4391 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4393 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4396 coeff_num = zeros_left + total_coeff - 1;
4397 j = scantable[coeff_num];
4399 block[j] = level[0];
4400 for(i=1;i<total_coeff;i++) {
4403 else if(zeros_left < 7){
4404 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4406 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4408 zeros_left -= run_before;
4409 coeff_num -= 1 + run_before;
4410 j= scantable[ coeff_num ];
4415 block[j] = (level[0] * qmul[j] + 32)>>6;
4416 for(i=1;i<total_coeff;i++) {
4419 else if(zeros_left < 7){
4420 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4422 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4424 zeros_left -= run_before;
4425 coeff_num -= 1 + run_before;
4426 j= scantable[ coeff_num ];
4428 block[j]= (level[i] * qmul[j] + 32)>>6;
4433 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4440 static void predict_field_decoding_flag(H264Context *h){
4441 MpegEncContext * const s = &h->s;
4442 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4443 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4444 ? s->current_picture.mb_type[mb_xy-1]
4445 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4446 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4448 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4452 * decodes a P_SKIP or B_SKIP macroblock
4454 static void decode_mb_skip(H264Context *h){
4455 MpegEncContext * const s = &h->s;
4456 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4459 memset(h->non_zero_count[mb_xy], 0, 16);
4460 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4463 mb_type|= MB_TYPE_INTERLACED;
4465 if( h->slice_type == B_TYPE )
4467 // just for fill_caches. pred_direct_motion will set the real mb_type
4468 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4470 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4471 pred_direct_motion(h, &mb_type);
4472 mb_type|= MB_TYPE_SKIP;
4477 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4479 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4480 pred_pskip_motion(h, &mx, &my);
4481 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4482 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4485 write_back_motion(h, mb_type);
4486 s->current_picture.mb_type[mb_xy]= mb_type;
4487 s->current_picture.qscale_table[mb_xy]= s->qscale;
4488 h->slice_table[ mb_xy ]= h->slice_num;
4489 h->prev_mb_skipped= 1;
4493 * decodes a macroblock
4494 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4496 static int decode_mb_cavlc(H264Context *h){
4497 MpegEncContext * const s = &h->s;
4498 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4499 int partition_count;
4500 unsigned int mb_type, cbp;
4501 int dct8x8_allowed= h->pps.transform_8x8_mode;
4503 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4505 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4506 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4508 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4509 if(s->mb_skip_run==-1)
4510 s->mb_skip_run= get_ue_golomb(&s->gb);
4512 if (s->mb_skip_run--) {
4513 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4514 if(s->mb_skip_run==0)
4515 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4517 predict_field_decoding_flag(h);
4524 if( (s->mb_y&1) == 0 )
4525 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4527 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4529 h->prev_mb_skipped= 0;
4531 mb_type= get_ue_golomb(&s->gb);
4532 if(h->slice_type == B_TYPE){
4534 partition_count= b_mb_type_info[mb_type].partition_count;
4535 mb_type= b_mb_type_info[mb_type].type;
4538 goto decode_intra_mb;
4540 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4542 partition_count= p_mb_type_info[mb_type].partition_count;
4543 mb_type= p_mb_type_info[mb_type].type;
4546 goto decode_intra_mb;
4549 assert(h->slice_type == I_TYPE);
4552 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4556 cbp= i_mb_type_info[mb_type].cbp;
4557 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4558 mb_type= i_mb_type_info[mb_type].type;
4562 mb_type |= MB_TYPE_INTERLACED;
4564 h->slice_table[ mb_xy ]= h->slice_num;
4566 if(IS_INTRA_PCM(mb_type)){
4569 // We assume these blocks are very rare so we do not optimize it.
4570 align_get_bits(&s->gb);
4572 // The pixels are stored in the same order as levels in h->mb array.
4573 for(y=0; y<16; y++){
4574 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4575 for(x=0; x<16; x++){
4576 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4577 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4581 const int index= 256 + 4*(y&3) + 32*(y>>2);
4583 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4584 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4588 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4590 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4591 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4595 // In deblocking, the quantizer is 0
4596 s->current_picture.qscale_table[mb_xy]= 0;
4597 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4598 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4599 // All coeffs are present
4600 memset(h->non_zero_count[mb_xy], 16, 16);
4602 s->current_picture.mb_type[mb_xy]= mb_type;
4607 h->ref_count[0] <<= 1;
4608 h->ref_count[1] <<= 1;
4611 fill_caches(h, mb_type, 0);
4614 if(IS_INTRA(mb_type)){
4616 // init_top_left_availability(h);
4617 if(IS_INTRA4x4(mb_type)){
4620 if(dct8x8_allowed && get_bits1(&s->gb)){
4621 mb_type |= MB_TYPE_8x8DCT;
4625 // fill_intra4x4_pred_table(h);
4626 for(i=0; i<16; i+=di){
4627 int mode= pred_intra_mode(h, i);
4629 if(!get_bits1(&s->gb)){
4630 const int rem_mode= get_bits(&s->gb, 3);
4631 mode = rem_mode + (rem_mode >= mode);
4635 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4637 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4639 write_back_intra_pred_mode(h);
4640 if( check_intra4x4_pred_mode(h) < 0)
4643 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4644 if(h->intra16x16_pred_mode < 0)
4648 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4651 h->chroma_pred_mode= pred_mode;
4652 }else if(partition_count==4){
4653 int i, j, sub_partition_count[4], list, ref[2][4];
4655 if(h->slice_type == B_TYPE){
4657 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4658 if(h->sub_mb_type[i] >=13){
4659 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4662 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4663 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4665 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4666 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4667 pred_direct_motion(h, &mb_type);
4668 h->ref_cache[0][scan8[4]] =
4669 h->ref_cache[1][scan8[4]] =
4670 h->ref_cache[0][scan8[12]] =
4671 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4674 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4676 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4677 if(h->sub_mb_type[i] >=4){
4678 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4681 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4682 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4686 for(list=0; list<h->list_count; list++){
4687 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4689 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4690 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4691 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4693 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4705 dct8x8_allowed = get_dct8x8_allowed(h);
4707 for(list=0; list<h->list_count; list++){
4709 if(IS_DIRECT(h->sub_mb_type[i])) {
4710 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4713 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4714 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4716 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4717 const int sub_mb_type= h->sub_mb_type[i];
4718 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4719 for(j=0; j<sub_partition_count[i]; j++){
4721 const int index= 4*i + block_width*j;
4722 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4723 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4724 mx += get_se_golomb(&s->gb);
4725 my += get_se_golomb(&s->gb);
4726 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4728 if(IS_SUB_8X8(sub_mb_type)){
4730 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4732 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4733 }else if(IS_SUB_8X4(sub_mb_type)){
4734 mv_cache[ 1 ][0]= mx;
4735 mv_cache[ 1 ][1]= my;
4736 }else if(IS_SUB_4X8(sub_mb_type)){
4737 mv_cache[ 8 ][0]= mx;
4738 mv_cache[ 8 ][1]= my;
4740 mv_cache[ 0 ][0]= mx;
4741 mv_cache[ 0 ][1]= my;
4744 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4750 }else if(IS_DIRECT(mb_type)){
4751 pred_direct_motion(h, &mb_type);
4752 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4754 int list, mx, my, i;
4755 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4756 if(IS_16X16(mb_type)){
4757 for(list=0; list<h->list_count; list++){
4759 if(IS_DIR(mb_type, 0, list)){
4760 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4761 if(val >= h->ref_count[list]){
4762 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4766 val= LIST_NOT_USED&0xFF;
4767 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4769 for(list=0; list<h->list_count; list++){
4771 if(IS_DIR(mb_type, 0, list)){
4772 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4773 mx += get_se_golomb(&s->gb);
4774 my += get_se_golomb(&s->gb);
4775 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4777 val= pack16to32(mx,my);
4780 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4783 else if(IS_16X8(mb_type)){
4784 for(list=0; list<h->list_count; list++){
4787 if(IS_DIR(mb_type, i, list)){
4788 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4789 if(val >= h->ref_count[list]){
4790 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4794 val= LIST_NOT_USED&0xFF;
4795 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4798 for(list=0; list<h->list_count; list++){
4801 if(IS_DIR(mb_type, i, list)){
4802 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4803 mx += get_se_golomb(&s->gb);
4804 my += get_se_golomb(&s->gb);
4805 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4807 val= pack16to32(mx,my);
4810 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4814 assert(IS_8X16(mb_type));
4815 for(list=0; list<h->list_count; list++){
4818 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4819 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4820 if(val >= h->ref_count[list]){
4821 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4825 val= LIST_NOT_USED&0xFF;
4826 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4829 for(list=0; list<h->list_count; list++){
4832 if(IS_DIR(mb_type, i, list)){
4833 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4834 mx += get_se_golomb(&s->gb);
4835 my += get_se_golomb(&s->gb);
4836 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4838 val= pack16to32(mx,my);
4841 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4847 if(IS_INTER(mb_type))
4848 write_back_motion(h, mb_type);
4850 if(!IS_INTRA16x16(mb_type)){
4851 cbp= get_ue_golomb(&s->gb);
4853 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4857 if(IS_INTRA4x4(mb_type))
4858 cbp= golomb_to_intra4x4_cbp[cbp];
4860 cbp= golomb_to_inter_cbp[cbp];
4864 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4865 if(get_bits1(&s->gb))
4866 mb_type |= MB_TYPE_8x8DCT;
4868 s->current_picture.mb_type[mb_xy]= mb_type;
4870 if(cbp || IS_INTRA16x16(mb_type)){
4871 int i8x8, i4x4, chroma_idx;
4873 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4874 const uint8_t *scan, *scan8x8, *dc_scan;
4876 // fill_non_zero_count_cache(h);
4878 if(IS_INTERLACED(mb_type)){
4879 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4880 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4881 dc_scan= luma_dc_field_scan;
4883 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4884 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4885 dc_scan= luma_dc_zigzag_scan;
4888 dquant= get_se_golomb(&s->gb);
4890 if( dquant > 25 || dquant < -26 ){
4891 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4895 s->qscale += dquant;
4896 if(((unsigned)s->qscale) > 51){
4897 if(s->qscale<0) s->qscale+= 52;
4898 else s->qscale-= 52;
4901 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4902 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4903 if(IS_INTRA16x16(mb_type)){
4904 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4905 return -1; //FIXME continue if partitioned and other return -1 too
4908 assert((cbp&15) == 0 || (cbp&15) == 15);
4911 for(i8x8=0; i8x8<4; i8x8++){
4912 for(i4x4=0; i4x4<4; i4x4++){
4913 const int index= i4x4 + 4*i8x8;
4914 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4920 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4923 for(i8x8=0; i8x8<4; i8x8++){
4924 if(cbp & (1<<i8x8)){
4925 if(IS_8x8DCT(mb_type)){
4926 DCTELEM *buf = &h->mb[64*i8x8];
4928 for(i4x4=0; i4x4<4; i4x4++){
4929 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4930 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4933 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4934 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4936 for(i4x4=0; i4x4<4; i4x4++){
4937 const int index= i4x4 + 4*i8x8;
4939 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4945 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4946 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4952 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4953 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4959 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4960 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4961 for(i4x4=0; i4x4<4; i4x4++){
4962 const int index= 16 + 4*chroma_idx + i4x4;
4963 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4969 uint8_t * const nnz= &h->non_zero_count_cache[0];
4970 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4971 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4974 uint8_t * const nnz= &h->non_zero_count_cache[0];
4975 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4976 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4977 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4979 s->current_picture.qscale_table[mb_xy]= s->qscale;
4980 write_back_non_zero_count(h);
4983 h->ref_count[0] >>= 1;
4984 h->ref_count[1] >>= 1;
4990 static int decode_cabac_field_decoding_flag(H264Context *h) {
4991 MpegEncContext * const s = &h->s;
4992 const int mb_x = s->mb_x;
4993 const int mb_y = s->mb_y & ~1;
4994 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4995 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4997 unsigned int ctx = 0;
4999 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5002 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5006 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5009 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5010 uint8_t *state= &h->cabac_state[ctx_base];
5014 MpegEncContext * const s = &h->s;
5015 const int mba_xy = h->left_mb_xy[0];
5016 const int mbb_xy = h->top_mb_xy;
5018 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5020 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5022 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5023 return 0; /* I4x4 */
5026 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5027 return 0; /* I4x4 */
5030 if( get_cabac_terminate( &h->cabac ) )
5031 return 25; /* PCM */
5033 mb_type = 1; /* I16x16 */
5034 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5035 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5036 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5037 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5038 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5042 static int decode_cabac_mb_type( H264Context *h ) {
5043 MpegEncContext * const s = &h->s;
5045 if( h->slice_type == I_TYPE ) {
5046 return decode_cabac_intra_mb_type(h, 3, 1);
5047 } else if( h->slice_type == P_TYPE ) {
5048 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5050 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5051 /* P_L0_D16x16, P_8x8 */
5052 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5054 /* P_L0_D8x16, P_L0_D16x8 */
5055 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5058 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5060 } else if( h->slice_type == B_TYPE ) {
5061 const int mba_xy = h->left_mb_xy[0];
5062 const int mbb_xy = h->top_mb_xy;
5066 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5068 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5071 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5072 return 0; /* B_Direct_16x16 */
5074 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5075 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5078 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5079 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5080 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5081 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5083 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5084 else if( bits == 13 ) {
5085 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5086 } else if( bits == 14 )
5087 return 11; /* B_L1_L0_8x16 */
5088 else if( bits == 15 )
5089 return 22; /* B_8x8 */
5091 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5092 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5094 /* TODO SI/SP frames? */
5099 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5100 MpegEncContext * const s = &h->s;
5104 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5105 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5108 && h->slice_table[mba_xy] == h->slice_num
5109 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5110 mba_xy += s->mb_stride;
5112 mbb_xy = mb_xy - s->mb_stride;
5114 && h->slice_table[mbb_xy] == h->slice_num
5115 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5116 mbb_xy -= s->mb_stride;
5118 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5120 int mb_xy = mb_x + mb_y*s->mb_stride;
5122 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5125 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5127 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5130 if( h->slice_type == B_TYPE )
5132 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5135 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5138 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5141 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5142 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5143 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5145 if( mode >= pred_mode )
5151 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5152 const int mba_xy = h->left_mb_xy[0];
5153 const int mbb_xy = h->top_mb_xy;
5157 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5158 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5161 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5164 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5167 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5169 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5175 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5176 int cbp_b, cbp_a, ctx, cbp = 0;
5178 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5179 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5181 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5182 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5183 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5184 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5185 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5186 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5187 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5188 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5191 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5195 cbp_a = (h->left_cbp>>4)&0x03;
5196 cbp_b = (h-> top_cbp>>4)&0x03;
5199 if( cbp_a > 0 ) ctx++;
5200 if( cbp_b > 0 ) ctx += 2;
5201 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5205 if( cbp_a == 2 ) ctx++;
5206 if( cbp_b == 2 ) ctx += 2;
5207 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5209 static int decode_cabac_mb_dqp( H264Context *h) {
5213 if( h->last_qscale_diff != 0 )
5216 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5222 if(val > 102) //prevent infinite loop
5229 return -(val + 1)/2;
5231 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5232 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5234 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5236 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5240 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5242 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5243 return 0; /* B_Direct_8x8 */
5244 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5245 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5247 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5248 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5249 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5252 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5253 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5257 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5258 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5261 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5262 int refa = h->ref_cache[list][scan8[n] - 1];
5263 int refb = h->ref_cache[list][scan8[n] - 8];
5267 if( h->slice_type == B_TYPE) {
5268 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5270 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5279 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5285 if(ref >= 32 /*h->ref_list[list]*/){
5286 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5287 return 0; //FIXME we should return -1 and check the return everywhere
5293 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5294 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5295 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5296 int ctxbase = (l == 0) ? 40 : 47;
5301 else if( amvd > 32 )
5306 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5311 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5319 while( get_cabac_bypass( &h->cabac ) ) {
5323 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5328 if( get_cabac_bypass( &h->cabac ) )
5332 return get_cabac_bypass_sign( &h->cabac, -mvd );
5335 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5340 nza = h->left_cbp&0x100;
5341 nzb = h-> top_cbp&0x100;
5342 } else if( cat == 1 || cat == 2 ) {
5343 nza = h->non_zero_count_cache[scan8[idx] - 1];
5344 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5345 } else if( cat == 3 ) {
5346 nza = (h->left_cbp>>(6+idx))&0x01;
5347 nzb = (h-> top_cbp>>(6+idx))&0x01;
5350 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5351 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5360 return ctx + 4 * cat;
5363 DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8[63]) = {
5364 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5365 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5366 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5367 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5370 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5371 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5372 static const int significant_coeff_flag_offset[2][6] = {
5373 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5374 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5376 static const int last_coeff_flag_offset[2][6] = {
5377 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5378 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5380 static const int coeff_abs_level_m1_offset[6] = {
5381 227+0, 227+10, 227+20, 227+30, 227+39, 426
5383 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5384 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5385 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5386 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5387 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5388 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5389 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5390 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5391 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5397 int coeff_count = 0;
5400 int abslevelgt1 = 0;
5402 uint8_t *significant_coeff_ctx_base;
5403 uint8_t *last_coeff_ctx_base;
5404 uint8_t *abs_level_m1_ctx_base;
5407 #define CABAC_ON_STACK
5409 #ifdef CABAC_ON_STACK
5412 cc.range = h->cabac.range;
5413 cc.low = h->cabac.low;
5414 cc.bytestream= h->cabac.bytestream;
5416 #define CC &h->cabac
5420 /* cat: 0-> DC 16x16 n = 0
5421 * 1-> AC 16x16 n = luma4x4idx
5422 * 2-> Luma4x4 n = luma4x4idx
5423 * 3-> DC Chroma n = iCbCr
5424 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5425 * 5-> Luma8x8 n = 4 * luma8x8idx
5428 /* read coded block flag */
5430 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5431 if( cat == 1 || cat == 2 )
5432 h->non_zero_count_cache[scan8[n]] = 0;
5434 h->non_zero_count_cache[scan8[16+n]] = 0;
5435 #ifdef CABAC_ON_STACK
5436 h->cabac.range = cc.range ;
5437 h->cabac.low = cc.low ;
5438 h->cabac.bytestream= cc.bytestream;
5444 significant_coeff_ctx_base = h->cabac_state
5445 + significant_coeff_flag_offset[MB_FIELD][cat];
5446 last_coeff_ctx_base = h->cabac_state
5447 + last_coeff_flag_offset[MB_FIELD][cat];
5448 abs_level_m1_ctx_base = h->cabac_state
5449 + coeff_abs_level_m1_offset[cat];
5452 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5453 for(last= 0; last < coefs; last++) { \
5454 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5455 if( get_cabac( CC, sig_ctx )) { \
5456 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5457 index[coeff_count++] = last; \
5458 if( get_cabac( CC, last_ctx ) ) { \
5464 if( last == max_coeff -1 ) {\
5465 index[coeff_count++] = last;\
5467 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5468 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5469 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5471 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5473 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5475 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5478 assert(coeff_count > 0);
5481 h->cbp_table[mb_xy] |= 0x100;
5482 else if( cat == 1 || cat == 2 )
5483 h->non_zero_count_cache[scan8[n]] = coeff_count;
5485 h->cbp_table[mb_xy] |= 0x40 << n;
5487 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5490 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5493 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5494 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5495 int j= scantable[index[coeff_count]];
5497 if( get_cabac( CC, ctx ) == 0 ) {
5499 block[j] = get_cabac_bypass_sign( CC, -1);
5501 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5507 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5508 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5512 if( coeff_abs >= 15 ) {
5514 while( get_cabac_bypass( CC ) ) {
5520 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5526 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5527 else block[j] = coeff_abs;
5529 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5530 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5536 #ifdef CABAC_ON_STACK
5537 h->cabac.range = cc.range ;
5538 h->cabac.low = cc.low ;
5539 h->cabac.bytestream= cc.bytestream;
5544 static inline void compute_mb_neighbors(H264Context *h)
5546 MpegEncContext * const s = &h->s;
5547 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5548 h->top_mb_xy = mb_xy - s->mb_stride;
5549 h->left_mb_xy[0] = mb_xy - 1;
5551 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5552 const int top_pair_xy = pair_xy - s->mb_stride;
5553 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5554 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5555 const int curr_mb_frame_flag = !MB_FIELD;
5556 const int bottom = (s->mb_y & 1);
5558 ? !curr_mb_frame_flag // bottom macroblock
5559 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5561 h->top_mb_xy -= s->mb_stride;
5563 if (left_mb_frame_flag != curr_mb_frame_flag) {
5564 h->left_mb_xy[0] = pair_xy - 1;
5566 } else if (FIELD_PICTURE) {
5567 h->top_mb_xy -= s->mb_stride;
5573 * decodes a macroblock
5574 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5576 static int decode_mb_cabac(H264Context *h) {
5577 MpegEncContext * const s = &h->s;
5578 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5579 int mb_type, partition_count, cbp = 0;
5580 int dct8x8_allowed= h->pps.transform_8x8_mode;
5582 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5584 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5585 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5587 /* a skipped mb needs the aff flag from the following mb */
5588 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5589 predict_field_decoding_flag(h);
5590 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5591 skip = h->next_mb_skipped;
5593 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5594 /* read skip flags */
5596 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5597 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5598 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5599 if(h->next_mb_skipped)
5600 predict_field_decoding_flag(h);
5602 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5607 h->cbp_table[mb_xy] = 0;
5608 h->chroma_pred_mode_table[mb_xy] = 0;
5609 h->last_qscale_diff = 0;
5616 if( (s->mb_y&1) == 0 )
5618 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5620 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5622 h->prev_mb_skipped = 0;
5624 compute_mb_neighbors(h);
5625 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5626 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5630 if( h->slice_type == B_TYPE ) {
5632 partition_count= b_mb_type_info[mb_type].partition_count;
5633 mb_type= b_mb_type_info[mb_type].type;
5636 goto decode_intra_mb;
5638 } else if( h->slice_type == P_TYPE ) {
5640 partition_count= p_mb_type_info[mb_type].partition_count;
5641 mb_type= p_mb_type_info[mb_type].type;
5644 goto decode_intra_mb;
5647 assert(h->slice_type == I_TYPE);
5649 partition_count = 0;
5650 cbp= i_mb_type_info[mb_type].cbp;
5651 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5652 mb_type= i_mb_type_info[mb_type].type;
5655 mb_type |= MB_TYPE_INTERLACED;
5657 h->slice_table[ mb_xy ]= h->slice_num;
5659 if(IS_INTRA_PCM(mb_type)) {
5663 // We assume these blocks are very rare so we do not optimize it.
5664 // FIXME The two following lines get the bitstream position in the cabac
5665 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5666 ptr= h->cabac.bytestream;
5667 if(h->cabac.low&0x1) ptr--;
5669 if(h->cabac.low&0x1FF) ptr--;
5672 // The pixels are stored in the same order as levels in h->mb array.
5673 for(y=0; y<16; y++){
5674 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5675 for(x=0; x<16; x++){
5676 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5677 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5681 const int index= 256 + 4*(y&3) + 32*(y>>2);
5683 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5684 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5688 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5690 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5691 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5695 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5697 // All blocks are present
5698 h->cbp_table[mb_xy] = 0x1ef;
5699 h->chroma_pred_mode_table[mb_xy] = 0;
5700 // In deblocking, the quantizer is 0
5701 s->current_picture.qscale_table[mb_xy]= 0;
5702 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5703 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5704 // All coeffs are present
5705 memset(h->non_zero_count[mb_xy], 16, 16);
5706 s->current_picture.mb_type[mb_xy]= mb_type;
5711 h->ref_count[0] <<= 1;
5712 h->ref_count[1] <<= 1;
5715 fill_caches(h, mb_type, 0);
5717 if( IS_INTRA( mb_type ) ) {
5719 if( IS_INTRA4x4( mb_type ) ) {
5720 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5721 mb_type |= MB_TYPE_8x8DCT;
5722 for( i = 0; i < 16; i+=4 ) {
5723 int pred = pred_intra_mode( h, i );
5724 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5725 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5728 for( i = 0; i < 16; i++ ) {
5729 int pred = pred_intra_mode( h, i );
5730 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5732 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5735 write_back_intra_pred_mode(h);
5736 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5738 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5739 if( h->intra16x16_pred_mode < 0 ) return -1;
5741 h->chroma_pred_mode_table[mb_xy] =
5742 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5744 pred_mode= check_intra_pred_mode( h, pred_mode );
5745 if( pred_mode < 0 ) return -1;
5746 h->chroma_pred_mode= pred_mode;
5747 } else if( partition_count == 4 ) {
5748 int i, j, sub_partition_count[4], list, ref[2][4];
5750 if( h->slice_type == B_TYPE ) {
5751 for( i = 0; i < 4; i++ ) {
5752 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5753 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5754 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5756 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5757 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5758 pred_direct_motion(h, &mb_type);
5759 h->ref_cache[0][scan8[4]] =
5760 h->ref_cache[1][scan8[4]] =
5761 h->ref_cache[0][scan8[12]] =
5762 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5763 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5764 for( i = 0; i < 4; i++ )
5765 if( IS_DIRECT(h->sub_mb_type[i]) )
5766 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5770 for( i = 0; i < 4; i++ ) {
5771 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5772 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5773 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5777 for( list = 0; list < h->list_count; list++ ) {
5778 for( i = 0; i < 4; i++ ) {
5779 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5780 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5781 if( h->ref_count[list] > 1 )
5782 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5788 h->ref_cache[list][ scan8[4*i]+1 ]=
5789 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5794 dct8x8_allowed = get_dct8x8_allowed(h);
5796 for(list=0; list<h->list_count; list++){
5798 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5799 if(IS_DIRECT(h->sub_mb_type[i])){
5800 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5804 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5805 const int sub_mb_type= h->sub_mb_type[i];
5806 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5807 for(j=0; j<sub_partition_count[i]; j++){
5810 const int index= 4*i + block_width*j;
5811 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5812 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5813 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5815 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5816 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5817 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5819 if(IS_SUB_8X8(sub_mb_type)){
5821 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5823 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5826 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5828 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5829 }else if(IS_SUB_8X4(sub_mb_type)){
5830 mv_cache[ 1 ][0]= mx;
5831 mv_cache[ 1 ][1]= my;
5833 mvd_cache[ 1 ][0]= mx - mpx;
5834 mvd_cache[ 1 ][1]= my - mpy;
5835 }else if(IS_SUB_4X8(sub_mb_type)){
5836 mv_cache[ 8 ][0]= mx;
5837 mv_cache[ 8 ][1]= my;
5839 mvd_cache[ 8 ][0]= mx - mpx;
5840 mvd_cache[ 8 ][1]= my - mpy;
5842 mv_cache[ 0 ][0]= mx;
5843 mv_cache[ 0 ][1]= my;
5845 mvd_cache[ 0 ][0]= mx - mpx;
5846 mvd_cache[ 0 ][1]= my - mpy;
5849 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5850 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5851 p[0] = p[1] = p[8] = p[9] = 0;
5852 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5856 } else if( IS_DIRECT(mb_type) ) {
5857 pred_direct_motion(h, &mb_type);
5858 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5859 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5860 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5862 int list, mx, my, i, mpx, mpy;
5863 if(IS_16X16(mb_type)){
5864 for(list=0; list<h->list_count; list++){
5865 if(IS_DIR(mb_type, 0, list)){
5866 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5867 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5869 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5871 for(list=0; list<h->list_count; list++){
5872 if(IS_DIR(mb_type, 0, list)){
5873 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5875 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5876 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5877 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5879 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5880 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5882 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5885 else if(IS_16X8(mb_type)){
5886 for(list=0; list<h->list_count; list++){
5888 if(IS_DIR(mb_type, i, list)){
5889 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5890 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5892 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5895 for(list=0; list<h->list_count; list++){
5897 if(IS_DIR(mb_type, i, list)){
5898 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5899 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5900 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5901 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5903 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5904 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5906 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5907 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5912 assert(IS_8X16(mb_type));
5913 for(list=0; list<h->list_count; list++){
5915 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5916 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5917 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5919 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5922 for(list=0; list<h->list_count; list++){
5924 if(IS_DIR(mb_type, i, list)){
5925 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5926 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5927 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5929 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5930 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5931 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5933 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5934 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5941 if( IS_INTER( mb_type ) ) {
5942 h->chroma_pred_mode_table[mb_xy] = 0;
5943 write_back_motion( h, mb_type );
5946 if( !IS_INTRA16x16( mb_type ) ) {
5947 cbp = decode_cabac_mb_cbp_luma( h );
5948 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5951 h->cbp_table[mb_xy] = h->cbp = cbp;
5953 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5954 if( decode_cabac_mb_transform_size( h ) )
5955 mb_type |= MB_TYPE_8x8DCT;
5957 s->current_picture.mb_type[mb_xy]= mb_type;
5959 if( cbp || IS_INTRA16x16( mb_type ) ) {
5960 const uint8_t *scan, *scan8x8, *dc_scan;
5961 const uint32_t *qmul;
5964 if(IS_INTERLACED(mb_type)){
5965 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5966 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5967 dc_scan= luma_dc_field_scan;
5969 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5970 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5971 dc_scan= luma_dc_zigzag_scan;
5974 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5975 if( dqp == INT_MIN ){
5976 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5980 if(((unsigned)s->qscale) > 51){
5981 if(s->qscale<0) s->qscale+= 52;
5982 else s->qscale-= 52;
5984 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5985 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5987 if( IS_INTRA16x16( mb_type ) ) {
5989 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5990 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5993 qmul = h->dequant4_coeff[0][s->qscale];
5994 for( i = 0; i < 16; i++ ) {
5995 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5996 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5999 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6003 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6004 if( cbp & (1<<i8x8) ) {
6005 if( IS_8x8DCT(mb_type) ) {
6006 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6007 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6009 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6010 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6011 const int index = 4*i8x8 + i4x4;
6012 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6014 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6015 //STOP_TIMER("decode_residual")
6019 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6020 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6027 for( c = 0; c < 2; c++ ) {
6028 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6029 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6035 for( c = 0; c < 2; c++ ) {
6036 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6037 for( i = 0; i < 4; i++ ) {
6038 const int index = 16 + 4 * c + i;
6039 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6040 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6044 uint8_t * const nnz= &h->non_zero_count_cache[0];
6045 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6046 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6049 uint8_t * const nnz= &h->non_zero_count_cache[0];
6050 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6051 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6052 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6053 h->last_qscale_diff = 0;
6056 s->current_picture.qscale_table[mb_xy]= s->qscale;
6057 write_back_non_zero_count(h);
6060 h->ref_count[0] >>= 1;
6061 h->ref_count[1] >>= 1;
6068 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6070 const int index_a = qp + h->slice_alpha_c0_offset;
6071 const int alpha = (alpha_table+52)[index_a];
6072 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6077 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6078 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6080 /* 16px edge length, because bS=4 is triggered by being at
6081 * the edge of an intra MB, so all 4 bS are the same */
6082 for( d = 0; d < 16; d++ ) {
6083 const int p0 = pix[-1];
6084 const int p1 = pix[-2];
6085 const int p2 = pix[-3];
6087 const int q0 = pix[0];
6088 const int q1 = pix[1];
6089 const int q2 = pix[2];
6091 if( FFABS( p0 - q0 ) < alpha &&
6092 FFABS( p1 - p0 ) < beta &&
6093 FFABS( q1 - q0 ) < beta ) {
6095 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6096 if( FFABS( p2 - p0 ) < beta)
6098 const int p3 = pix[-4];
6100 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6101 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6102 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6105 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6107 if( FFABS( q2 - q0 ) < beta)
6109 const int q3 = pix[3];
6111 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6112 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6113 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6116 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6120 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6121 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6123 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6129 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6131 const int index_a = qp + h->slice_alpha_c0_offset;
6132 const int alpha = (alpha_table+52)[index_a];
6133 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6138 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6139 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6141 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6145 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6147 for( i = 0; i < 16; i++, pix += stride) {
6153 int bS_index = (i >> 1);
6156 bS_index |= (i & 1);
6159 if( bS[bS_index] == 0 ) {
6163 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6164 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6165 alpha = (alpha_table+52)[index_a];
6166 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6168 if( bS[bS_index] < 4 ) {
6169 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6170 const int p0 = pix[-1];
6171 const int p1 = pix[-2];
6172 const int p2 = pix[-3];
6173 const int q0 = pix[0];
6174 const int q1 = pix[1];
6175 const int q2 = pix[2];
6177 if( FFABS( p0 - q0 ) < alpha &&
6178 FFABS( p1 - p0 ) < beta &&
6179 FFABS( q1 - q0 ) < beta ) {
6183 if( FFABS( p2 - p0 ) < beta ) {
6184 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6187 if( FFABS( q2 - q0 ) < beta ) {
6188 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6192 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6193 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6194 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6195 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6198 const int p0 = pix[-1];
6199 const int p1 = pix[-2];
6200 const int p2 = pix[-3];
6202 const int q0 = pix[0];
6203 const int q1 = pix[1];
6204 const int q2 = pix[2];
6206 if( FFABS( p0 - q0 ) < alpha &&
6207 FFABS( p1 - p0 ) < beta &&
6208 FFABS( q1 - q0 ) < beta ) {
6210 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6211 if( FFABS( p2 - p0 ) < beta)
6213 const int p3 = pix[-4];
6215 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6216 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6217 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6220 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6222 if( FFABS( q2 - q0 ) < beta)
6224 const int q3 = pix[3];
6226 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6227 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6228 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6231 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6235 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6236 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6238 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6243 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6245 for( i = 0; i < 8; i++, pix += stride) {
6253 if( bS[bS_index] == 0 ) {
6257 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6258 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6259 alpha = (alpha_table+52)[index_a];
6260 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6262 if( bS[bS_index] < 4 ) {
6263 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6264 const int p0 = pix[-1];
6265 const int p1 = pix[-2];
6266 const int q0 = pix[0];
6267 const int q1 = pix[1];
6269 if( FFABS( p0 - q0 ) < alpha &&
6270 FFABS( p1 - p0 ) < beta &&
6271 FFABS( q1 - q0 ) < beta ) {
6272 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6274 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6275 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6276 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6279 const int p0 = pix[-1];
6280 const int p1 = pix[-2];
6281 const int q0 = pix[0];
6282 const int q1 = pix[1];
6284 if( FFABS( p0 - q0 ) < alpha &&
6285 FFABS( p1 - p0 ) < beta &&
6286 FFABS( q1 - q0 ) < beta ) {
6288 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6289 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6290 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6296 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6298 const int index_a = qp + h->slice_alpha_c0_offset;
6299 const int alpha = (alpha_table+52)[index_a];
6300 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6301 const int pix_next = stride;
6306 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6307 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6309 /* 16px edge length, see filter_mb_edgev */
6310 for( d = 0; d < 16; d++ ) {
6311 const int p0 = pix[-1*pix_next];
6312 const int p1 = pix[-2*pix_next];
6313 const int p2 = pix[-3*pix_next];
6314 const int q0 = pix[0];
6315 const int q1 = pix[1*pix_next];
6316 const int q2 = pix[2*pix_next];
6318 if( FFABS( p0 - q0 ) < alpha &&
6319 FFABS( p1 - p0 ) < beta &&
6320 FFABS( q1 - q0 ) < beta ) {
6322 const int p3 = pix[-4*pix_next];
6323 const int q3 = pix[ 3*pix_next];
6325 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6326 if( FFABS( p2 - p0 ) < beta) {
6328 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6329 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6330 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6333 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6335 if( FFABS( q2 - q0 ) < beta) {
6337 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6338 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6339 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6342 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6346 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6347 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6349 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6356 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6358 const int index_a = qp + h->slice_alpha_c0_offset;
6359 const int alpha = (alpha_table+52)[index_a];
6360 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6365 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6366 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6368 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6372 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6373 MpegEncContext * const s = &h->s;
6374 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6376 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6378 mb_xy = mb_x + mb_y*s->mb_stride;
6380 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6381 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6382 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6383 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6386 assert(!FRAME_MBAFF);
6388 mb_type = s->current_picture.mb_type[mb_xy];
6389 qp = s->current_picture.qscale_table[mb_xy];
6390 qp0 = s->current_picture.qscale_table[mb_xy-1];
6391 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6392 qpc = get_chroma_qp( h, 0, qp );
6393 qpc0 = get_chroma_qp( h, 0, qp0 );
6394 qpc1 = get_chroma_qp( h, 0, qp1 );
6395 qp0 = (qp + qp0 + 1) >> 1;
6396 qp1 = (qp + qp1 + 1) >> 1;
6397 qpc0 = (qpc + qpc0 + 1) >> 1;
6398 qpc1 = (qpc + qpc1 + 1) >> 1;
6399 qp_thresh = 15 - h->slice_alpha_c0_offset;
6400 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6401 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6404 if( IS_INTRA(mb_type) ) {
6405 int16_t bS4[4] = {4,4,4,4};
6406 int16_t bS3[4] = {3,3,3,3};
6407 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6408 if( IS_8x8DCT(mb_type) ) {
6409 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6410 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6411 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6412 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6414 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6415 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6416 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6417 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6418 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6419 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6420 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6421 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6423 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6424 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6425 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6426 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6427 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6428 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6429 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6430 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6433 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6434 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6436 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6438 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6440 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6441 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6442 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6443 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6445 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6446 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6447 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6448 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6450 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6451 bSv[0][0] = 0x0004000400040004ULL;
6452 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6453 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6455 #define FILTER(hv,dir,edge)\
6456 if(bSv[dir][edge]) {\
6457 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6459 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6460 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6466 } else if( IS_8x8DCT(mb_type) ) {
6485 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6486 MpegEncContext * const s = &h->s;
6487 const int mb_xy= mb_x + mb_y*s->mb_stride;
6488 const int mb_type = s->current_picture.mb_type[mb_xy];
6489 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6490 int first_vertical_edge_done = 0;
6492 /* FIXME: A given frame may occupy more than one position in
6493 * the reference list. So ref2frm should be populated with
6494 * frame numbers, not indices. */
6495 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6496 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6498 //for sufficiently low qp, filtering wouldn't do anything
6499 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6501 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6502 int qp = s->current_picture.qscale_table[mb_xy];
6504 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6505 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6511 // left mb is in picture
6512 && h->slice_table[mb_xy-1] != 255
6513 // and current and left pair do not have the same interlaced type
6514 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6515 // and left mb is in the same slice if deblocking_filter == 2
6516 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6517 /* First vertical edge is different in MBAFF frames
6518 * There are 8 different bS to compute and 2 different Qp
6520 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6521 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6526 int mb_qp, mbn0_qp, mbn1_qp;
6528 first_vertical_edge_done = 1;
6530 if( IS_INTRA(mb_type) )
6531 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6533 for( i = 0; i < 8; i++ ) {
6534 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6536 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6538 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6539 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6540 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6547 mb_qp = s->current_picture.qscale_table[mb_xy];
6548 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6549 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6550 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6551 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6552 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6553 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6554 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6555 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6556 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6557 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6558 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6559 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6562 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6563 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6564 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6565 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6566 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6568 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6569 for( dir = 0; dir < 2; dir++ )
6572 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6573 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6574 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6576 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6577 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6578 // how often to recheck mv-based bS when iterating between edges
6579 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6580 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6581 // how often to recheck mv-based bS when iterating along each edge
6582 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6584 if (first_vertical_edge_done) {
6586 first_vertical_edge_done = 0;
6589 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6592 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6593 && !IS_INTERLACED(mb_type)
6594 && IS_INTERLACED(mbm_type)
6596 // This is a special case in the norm where the filtering must
6597 // be done twice (one each of the field) even if we are in a
6598 // frame macroblock.
6600 static const int nnz_idx[4] = {4,5,6,3};
6601 unsigned int tmp_linesize = 2 * linesize;
6602 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6603 int mbn_xy = mb_xy - 2 * s->mb_stride;
6608 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6609 if( IS_INTRA(mb_type) ||
6610 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6611 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6613 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6614 for( i = 0; i < 4; i++ ) {
6615 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6616 mbn_nnz[nnz_idx[i]] != 0 )
6622 // Do not use s->qscale as luma quantizer because it has not the same
6623 // value in IPCM macroblocks.
6624 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6625 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6626 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6627 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6628 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6629 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6630 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6631 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6638 for( edge = start; edge < edges; edge++ ) {
6639 /* mbn_xy: neighbor macroblock */
6640 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6641 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6645 if( (edge&1) && IS_8x8DCT(mb_type) )
6648 if( IS_INTRA(mb_type) ||
6649 IS_INTRA(mbn_type) ) {
6652 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6653 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6662 bS[0] = bS[1] = bS[2] = bS[3] = value;
6667 if( edge & mask_edge ) {
6668 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6671 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6672 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6675 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6676 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6677 int bn_idx= b_idx - (dir ? 8:1);
6679 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6680 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6681 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6682 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6684 bS[0] = bS[1] = bS[2] = bS[3] = v;
6690 for( i = 0; i < 4; i++ ) {
6691 int x = dir == 0 ? edge : i;
6692 int y = dir == 0 ? i : edge;
6693 int b_idx= 8 + 4 + x + 8*y;
6694 int bn_idx= b_idx - (dir ? 8:1);
6696 if( h->non_zero_count_cache[b_idx] != 0 ||
6697 h->non_zero_count_cache[bn_idx] != 0 ) {
6703 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6704 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6705 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6706 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6714 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6719 // Do not use s->qscale as luma quantizer because it has not the same
6720 // value in IPCM macroblocks.
6721 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6722 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6723 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6724 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6726 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6727 if( (edge&1) == 0 ) {
6728 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6729 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6730 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6731 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6734 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6735 if( (edge&1) == 0 ) {
6736 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6737 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6738 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6739 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6746 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6747 MpegEncContext * const s = &h->s;
6748 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6752 if( h->pps.cabac ) {
6756 align_get_bits( &s->gb );
6759 ff_init_cabac_states( &h->cabac);
6760 ff_init_cabac_decoder( &h->cabac,
6761 s->gb.buffer + get_bits_count(&s->gb)/8,
6762 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6763 /* calculate pre-state */
6764 for( i= 0; i < 460; i++ ) {
6766 if( h->slice_type == I_TYPE )
6767 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6769 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6772 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6774 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6779 int ret = decode_mb_cabac(h);
6781 //STOP_TIMER("decode_mb_cabac")
6783 if(ret>=0) hl_decode_mb(h);
6785 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6788 if(ret>=0) ret = decode_mb_cabac(h);
6790 if(ret>=0) hl_decode_mb(h);
6793 eos = get_cabac_terminate( &h->cabac );
6795 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6796 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6797 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6801 if( ++s->mb_x >= s->mb_width ) {
6803 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6805 if(FIELD_OR_MBAFF_PICTURE) {
6810 if( eos || s->mb_y >= s->mb_height ) {
6811 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6812 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6819 int ret = decode_mb_cavlc(h);
6821 if(ret>=0) hl_decode_mb(h);
6823 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6825 ret = decode_mb_cavlc(h);
6827 if(ret>=0) hl_decode_mb(h);
6832 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6833 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6838 if(++s->mb_x >= s->mb_width){
6840 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6842 if(FIELD_OR_MBAFF_PICTURE) {
6845 if(s->mb_y >= s->mb_height){
6846 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6848 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6849 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6853 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6860 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6861 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6862 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6863 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6867 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6876 for(;s->mb_y < s->mb_height; s->mb_y++){
6877 for(;s->mb_x < s->mb_width; s->mb_x++){
6878 int ret= decode_mb(h);
6883 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6884 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6889 if(++s->mb_x >= s->mb_width){
6891 if(++s->mb_y >= s->mb_height){
6892 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6893 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6897 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6904 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6905 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6906 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6910 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6917 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6920 return -1; //not reached
6923 static int decode_unregistered_user_data(H264Context *h, int size){
6924 MpegEncContext * const s = &h->s;
6925 uint8_t user_data[16+256];
6931 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6932 user_data[i]= get_bits(&s->gb, 8);
6936 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6937 if(e==1 && build>=0)
6938 h->x264_build= build;
6940 if(s->avctx->debug & FF_DEBUG_BUGS)
6941 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6944 skip_bits(&s->gb, 8);
6949 static int decode_sei(H264Context *h){
6950 MpegEncContext * const s = &h->s;
6952 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6957 type+= show_bits(&s->gb, 8);
6958 }while(get_bits(&s->gb, 8) == 255);
6962 size+= show_bits(&s->gb, 8);
6963 }while(get_bits(&s->gb, 8) == 255);
6967 if(decode_unregistered_user_data(h, size) < 0)
6971 skip_bits(&s->gb, 8*size);
6974 //FIXME check bits here
6975 align_get_bits(&s->gb);
6981 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6982 MpegEncContext * const s = &h->s;
6984 cpb_count = get_ue_golomb(&s->gb) + 1;
6985 get_bits(&s->gb, 4); /* bit_rate_scale */
6986 get_bits(&s->gb, 4); /* cpb_size_scale */
6987 for(i=0; i<cpb_count; i++){
6988 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6989 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6990 get_bits1(&s->gb); /* cbr_flag */
6992 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6993 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6994 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6995 get_bits(&s->gb, 5); /* time_offset_length */
6998 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6999 MpegEncContext * const s = &h->s;
7000 int aspect_ratio_info_present_flag;
7001 unsigned int aspect_ratio_idc;
7002 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7004 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7006 if( aspect_ratio_info_present_flag ) {
7007 aspect_ratio_idc= get_bits(&s->gb, 8);
7008 if( aspect_ratio_idc == EXTENDED_SAR ) {
7009 sps->sar.num= get_bits(&s->gb, 16);
7010 sps->sar.den= get_bits(&s->gb, 16);
7011 }else if(aspect_ratio_idc < 14){
7012 sps->sar= pixel_aspect[aspect_ratio_idc];
7014 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7021 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7023 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7024 get_bits1(&s->gb); /* overscan_appropriate_flag */
7027 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7028 get_bits(&s->gb, 3); /* video_format */
7029 get_bits1(&s->gb); /* video_full_range_flag */
7030 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7031 get_bits(&s->gb, 8); /* colour_primaries */
7032 get_bits(&s->gb, 8); /* transfer_characteristics */
7033 get_bits(&s->gb, 8); /* matrix_coefficients */
7037 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7038 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7039 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7042 sps->timing_info_present_flag = get_bits1(&s->gb);
7043 if(sps->timing_info_present_flag){
7044 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7045 sps->time_scale = get_bits_long(&s->gb, 32);
7046 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7049 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7050 if(nal_hrd_parameters_present_flag)
7051 decode_hrd_parameters(h, sps);
7052 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7053 if(vcl_hrd_parameters_present_flag)
7054 decode_hrd_parameters(h, sps);
7055 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7056 get_bits1(&s->gb); /* low_delay_hrd_flag */
7057 get_bits1(&s->gb); /* pic_struct_present_flag */
7059 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7060 if(sps->bitstream_restriction_flag){
7061 unsigned int num_reorder_frames;
7062 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7063 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7064 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7065 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7066 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7067 num_reorder_frames= get_ue_golomb(&s->gb);
7068 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7070 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7071 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7075 sps->num_reorder_frames= num_reorder_frames;
7081 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7082 const uint8_t *jvt_list, const uint8_t *fallback_list){
7083 MpegEncContext * const s = &h->s;
7084 int i, last = 8, next = 8;
7085 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7086 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7087 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7089 for(i=0;i<size;i++){
7091 next = (last + get_se_golomb(&s->gb)) & 0xff;
7092 if(!i && !next){ /* matrix not written, we use the preset one */
7093 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7096 last = factors[scan[i]] = next ? next : last;
7100 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7101 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7102 MpegEncContext * const s = &h->s;
7103 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7104 const uint8_t *fallback[4] = {
7105 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7106 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7107 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7108 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7110 if(get_bits1(&s->gb)){
7111 sps->scaling_matrix_present |= is_sps;
7112 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7113 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7114 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7115 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7116 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7117 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7118 if(is_sps || pps->transform_8x8_mode){
7119 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7120 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7122 } else if(fallback_sps) {
7123 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7124 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7129 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7132 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7133 const size_t size, const char *name)
7136 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7141 vec[id] = av_mallocz(size);
7143 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7148 static inline int decode_seq_parameter_set(H264Context *h){
7149 MpegEncContext * const s = &h->s;
7150 int profile_idc, level_idc;
7151 unsigned int sps_id, tmp, mb_width, mb_height;
7155 profile_idc= get_bits(&s->gb, 8);
7156 get_bits1(&s->gb); //constraint_set0_flag
7157 get_bits1(&s->gb); //constraint_set1_flag
7158 get_bits1(&s->gb); //constraint_set2_flag
7159 get_bits1(&s->gb); //constraint_set3_flag
7160 get_bits(&s->gb, 4); // reserved
7161 level_idc= get_bits(&s->gb, 8);
7162 sps_id= get_ue_golomb(&s->gb);
7164 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7168 sps->profile_idc= profile_idc;
7169 sps->level_idc= level_idc;
7171 if(sps->profile_idc >= 100){ //high profile
7172 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7173 get_bits1(&s->gb); //residual_color_transform_flag
7174 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7175 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7176 sps->transform_bypass = get_bits1(&s->gb);
7177 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7179 sps->scaling_matrix_present = 0;
7181 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7182 sps->poc_type= get_ue_golomb(&s->gb);
7184 if(sps->poc_type == 0){ //FIXME #define
7185 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7186 } else if(sps->poc_type == 1){//FIXME #define
7187 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7188 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7189 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7190 tmp= get_ue_golomb(&s->gb);
7192 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7193 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7196 sps->poc_cycle_length= tmp;
7198 for(i=0; i<sps->poc_cycle_length; i++)
7199 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7200 }else if(sps->poc_type != 2){
7201 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7205 tmp= get_ue_golomb(&s->gb);
7206 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7207 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7210 sps->ref_frame_count= tmp;
7211 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7212 mb_width= get_ue_golomb(&s->gb) + 1;
7213 mb_height= get_ue_golomb(&s->gb) + 1;
7214 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7215 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7216 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7219 sps->mb_width = mb_width;
7220 sps->mb_height= mb_height;
7222 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7223 if(!sps->frame_mbs_only_flag)
7224 sps->mb_aff= get_bits1(&s->gb);
7228 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7230 #ifndef ALLOW_INTERLACE
7232 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7234 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7235 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7237 sps->crop= get_bits1(&s->gb);
7239 sps->crop_left = get_ue_golomb(&s->gb);
7240 sps->crop_right = get_ue_golomb(&s->gb);
7241 sps->crop_top = get_ue_golomb(&s->gb);
7242 sps->crop_bottom= get_ue_golomb(&s->gb);
7243 if(sps->crop_left || sps->crop_top){
7244 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7250 sps->crop_bottom= 0;
7253 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7254 if( sps->vui_parameters_present_flag )
7255 decode_vui_parameters(h, sps);
7257 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7258 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7259 sps_id, sps->profile_idc, sps->level_idc,
7261 sps->ref_frame_count,
7262 sps->mb_width, sps->mb_height,
7263 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7264 sps->direct_8x8_inference_flag ? "8B8" : "",
7265 sps->crop_left, sps->crop_right,
7266 sps->crop_top, sps->crop_bottom,
7267 sps->vui_parameters_present_flag ? "VUI" : ""
7274 build_qp_table(PPS *pps, int t, int index)
7277 for(i = 0; i < 255; i++)
7278 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7281 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7282 MpegEncContext * const s = &h->s;
7283 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7286 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7290 tmp= get_ue_golomb(&s->gb);
7291 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7292 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7297 pps->cabac= get_bits1(&s->gb);
7298 pps->pic_order_present= get_bits1(&s->gb);
7299 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7300 if(pps->slice_group_count > 1 ){
7301 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7302 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7303 switch(pps->mb_slice_group_map_type){
7306 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7307 | run_length[ i ] |1 |ue(v) |
7312 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7314 | top_left_mb[ i ] |1 |ue(v) |
7315 | bottom_right_mb[ i ] |1 |ue(v) |
7323 | slice_group_change_direction_flag |1 |u(1) |
7324 | slice_group_change_rate_minus1 |1 |ue(v) |
7329 | slice_group_id_cnt_minus1 |1 |ue(v) |
7330 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7332 | slice_group_id[ i ] |1 |u(v) |
7337 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7338 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7339 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7340 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7341 pps->ref_count[0]= pps->ref_count[1]= 1;
7345 pps->weighted_pred= get_bits1(&s->gb);
7346 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7347 pps->init_qp= get_se_golomb(&s->gb) + 26;
7348 pps->init_qs= get_se_golomb(&s->gb) + 26;
7349 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7350 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7351 pps->constrained_intra_pred= get_bits1(&s->gb);
7352 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7354 pps->transform_8x8_mode= 0;
7355 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7356 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7357 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7359 if(get_bits_count(&s->gb) < bit_length){
7360 pps->transform_8x8_mode= get_bits1(&s->gb);
7361 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7362 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7364 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7367 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7368 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7369 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7370 h->pps.chroma_qp_diff= 1;
7372 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7374 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7375 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7376 pps_id, pps->sps_id,
7377 pps->cabac ? "CABAC" : "CAVLC",
7378 pps->slice_group_count,
7379 pps->ref_count[0], pps->ref_count[1],
7380 pps->weighted_pred ? "weighted" : "",
7381 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7382 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7383 pps->constrained_intra_pred ? "CONSTR" : "",
7384 pps->redundant_pic_cnt_present ? "REDU" : "",
7385 pps->transform_8x8_mode ? "8x8DCT" : ""
7393 * Call decode_slice() for each context.
7395 * @param h h264 master context
7396 * @param context_count number of contexts to execute
7398 static void execute_decode_slices(H264Context *h, int context_count){
7399 MpegEncContext * const s = &h->s;
7400 AVCodecContext * const avctx= s->avctx;
7404 if(context_count == 1) {
7405 decode_slice(avctx, h);
7407 for(i = 1; i < context_count; i++) {
7408 hx = h->thread_context[i];
7409 hx->s.error_resilience = avctx->error_resilience;
7410 hx->s.error_count = 0;
7413 avctx->execute(avctx, (void *)decode_slice,
7414 (void **)h->thread_context, NULL, context_count);
7416 /* pull back stuff from slices to master context */
7417 hx = h->thread_context[context_count - 1];
7418 s->mb_x = hx->s.mb_x;
7419 s->mb_y = hx->s.mb_y;
7420 s->dropable = hx->s.dropable;
7421 s->picture_structure = hx->s.picture_structure;
7422 for(i = 1; i < context_count; i++)
7423 h->s.error_count += h->thread_context[i]->s.error_count;
7428 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7429 MpegEncContext * const s = &h->s;
7430 AVCodecContext * const avctx= s->avctx;
7432 H264Context *hx; ///< thread context
7433 int context_count = 0;
7435 h->max_contexts = avctx->thread_count;
7438 for(i=0; i<50; i++){
7439 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7442 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7443 h->current_slice = 0;
7444 if (!s->first_field)
7445 s->current_picture_ptr= NULL;
7457 if(buf_index >= buf_size) break;
7459 for(i = 0; i < h->nal_length_size; i++)
7460 nalsize = (nalsize << 8) | buf[buf_index++];
7461 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7466 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7471 // start code prefix search
7472 for(; buf_index + 3 < buf_size; buf_index++){
7473 // This should always succeed in the first iteration.
7474 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7478 if(buf_index+3 >= buf_size) break;
7483 hx = h->thread_context[context_count];
7485 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7486 if (ptr==NULL || dst_length < 0){
7489 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7491 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7493 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7494 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7497 if (h->is_avc && (nalsize != consumed))
7498 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7500 buf_index += consumed;
7502 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7503 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7508 switch(hx->nal_unit_type){
7510 if (h->nal_unit_type != NAL_IDR_SLICE) {
7511 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7514 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7516 init_get_bits(&hx->s.gb, ptr, bit_length);
7518 hx->inter_gb_ptr= &hx->s.gb;
7519 hx->s.data_partitioning = 0;
7521 if((err = decode_slice_header(hx, h)))
7524 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7525 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7526 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7527 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7528 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7529 && avctx->skip_frame < AVDISCARD_ALL)
7533 init_get_bits(&hx->s.gb, ptr, bit_length);
7535 hx->inter_gb_ptr= NULL;
7536 hx->s.data_partitioning = 1;
7538 err = decode_slice_header(hx, h);
7541 init_get_bits(&hx->intra_gb, ptr, bit_length);
7542 hx->intra_gb_ptr= &hx->intra_gb;
7545 init_get_bits(&hx->inter_gb, ptr, bit_length);
7546 hx->inter_gb_ptr= &hx->inter_gb;
7548 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7549 && s->context_initialized
7551 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7552 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7553 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7554 && avctx->skip_frame < AVDISCARD_ALL)
7558 init_get_bits(&s->gb, ptr, bit_length);
7562 init_get_bits(&s->gb, ptr, bit_length);
7563 decode_seq_parameter_set(h);
7565 if(s->flags& CODEC_FLAG_LOW_DELAY)
7568 if(avctx->has_b_frames < 2)
7569 avctx->has_b_frames= !s->low_delay;
7572 init_get_bits(&s->gb, ptr, bit_length);
7574 decode_picture_parameter_set(h, bit_length);
7578 case NAL_END_SEQUENCE:
7579 case NAL_END_STREAM:
7580 case NAL_FILLER_DATA:
7582 case NAL_AUXILIARY_SLICE:
7585 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7588 if(context_count == h->max_contexts) {
7589 execute_decode_slices(h, context_count);
7594 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7596 /* Slice could not be decoded in parallel mode, copy down
7597 * NAL unit stuff to context 0 and restart. Note that
7598 * rbsp_buffer is not transfered, but since we no longer
7599 * run in parallel mode this should not be an issue. */
7600 h->nal_unit_type = hx->nal_unit_type;
7601 h->nal_ref_idc = hx->nal_ref_idc;
7607 execute_decode_slices(h, context_count);
7612 * returns the number of bytes consumed for building the current frame
7614 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7615 if(s->flags&CODEC_FLAG_TRUNCATED){
7616 pos -= s->parse_context.last_index;
7617 if(pos<0) pos=0; // FIXME remove (unneeded?)
7621 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7622 if(pos+10>buf_size) pos=buf_size; // oops ;)
7628 static int decode_frame(AVCodecContext *avctx,
7629 void *data, int *data_size,
7630 const uint8_t *buf, int buf_size)
7632 H264Context *h = avctx->priv_data;
7633 MpegEncContext *s = &h->s;
7634 AVFrame *pict = data;
7637 s->flags= avctx->flags;
7638 s->flags2= avctx->flags2;
7640 /* no supplementary picture */
7641 if (buf_size == 0) {
7645 //FIXME factorize this with the output code below
7646 out = h->delayed_pic[0];
7648 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7649 if(h->delayed_pic[i]->poc < out->poc){
7650 out = h->delayed_pic[i];
7654 for(i=out_idx; h->delayed_pic[i]; i++)
7655 h->delayed_pic[i] = h->delayed_pic[i+1];
7658 *data_size = sizeof(AVFrame);
7659 *pict= *(AVFrame*)out;
7665 if(s->flags&CODEC_FLAG_TRUNCATED){
7666 int next= ff_h264_find_frame_end(h, buf, buf_size);
7668 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7670 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7673 if(h->is_avc && !h->got_avcC) {
7674 int i, cnt, nalsize;
7675 unsigned char *p = avctx->extradata;
7676 if(avctx->extradata_size < 7) {
7677 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7681 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7684 /* sps and pps in the avcC always have length coded with 2 bytes,
7685 so put a fake nal_length_size = 2 while parsing them */
7686 h->nal_length_size = 2;
7687 // Decode sps from avcC
7688 cnt = *(p+5) & 0x1f; // Number of sps
7690 for (i = 0; i < cnt; i++) {
7691 nalsize = AV_RB16(p) + 2;
7692 if(decode_nal_units(h, p, nalsize) < 0) {
7693 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7698 // Decode pps from avcC
7699 cnt = *(p++); // Number of pps
7700 for (i = 0; i < cnt; i++) {
7701 nalsize = AV_RB16(p) + 2;
7702 if(decode_nal_units(h, p, nalsize) != nalsize) {
7703 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7708 // Now store right nal length size, that will be use to parse all other nals
7709 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7710 // Do not reparse avcC
7714 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7715 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7719 buf_index=decode_nal_units(h, buf, buf_size);
7723 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7724 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7725 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7729 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7730 Picture *out = s->current_picture_ptr;
7731 Picture *cur = s->current_picture_ptr;
7732 Picture *prev = h->delayed_output_pic;
7733 int i, pics, cross_idr, out_of_order, out_idx;
7737 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7738 s->current_picture_ptr->pict_type= s->pict_type;
7740 h->prev_frame_num_offset= h->frame_num_offset;
7741 h->prev_frame_num= h->frame_num;
7743 h->prev_poc_msb= h->poc_msb;
7744 h->prev_poc_lsb= h->poc_lsb;
7745 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7749 * FIXME: Error handling code does not seem to support interlaced
7750 * when slices span multiple rows
7751 * The ff_er_add_slice calls don't work right for bottom
7752 * fields; they cause massive erroneous error concealing
7753 * Error marking covers both fields (top and bottom).
7754 * This causes a mismatched s->error_count
7755 * and a bad error table. Further, the error count goes to
7756 * INT_MAX when called for bottom field, because mb_y is
7757 * past end by one (callers fault) and resync_mb_y != 0
7758 * causes problems for the first MB line, too.
7765 if (s->first_field) {
7766 /* Wait for second field. */
7770 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7771 /* Derive top_field_first from field pocs. */
7772 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7774 //FIXME do something with unavailable reference frames
7776 #if 0 //decode order
7777 *data_size = sizeof(AVFrame);
7779 /* Sort B-frames into display order */
7781 if(h->sps.bitstream_restriction_flag
7782 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7783 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7788 while(h->delayed_pic[pics]) pics++;
7790 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7792 h->delayed_pic[pics++] = cur;
7793 if(cur->reference == 0)
7794 cur->reference = DELAYED_PIC_REF;
7797 for(i=0; h->delayed_pic[i]; i++)
7798 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7801 out = h->delayed_pic[0];
7803 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7804 if(h->delayed_pic[i]->poc < out->poc){
7805 out = h->delayed_pic[i];
7809 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7810 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7812 else if(prev && pics <= s->avctx->has_b_frames)
7814 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7816 ((!cross_idr && prev && out->poc > prev->poc + 2)
7817 || cur->pict_type == B_TYPE)))
7820 s->avctx->has_b_frames++;
7823 else if(out_of_order)
7826 if(out_of_order || pics > s->avctx->has_b_frames){
7827 for(i=out_idx; h->delayed_pic[i]; i++)
7828 h->delayed_pic[i] = h->delayed_pic[i+1];
7834 *data_size = sizeof(AVFrame);
7835 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7836 prev->reference = 0;
7837 h->delayed_output_pic = out;
7841 *pict= *(AVFrame*)out;
7843 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7847 assert(pict->data[0] || !*data_size);
7848 ff_print_debug_info(s, pict);
7849 //printf("out %d\n", (int)pict->data[0]);
7852 /* Return the Picture timestamp as the frame number */
7853 /* we subtract 1 because it is added on utils.c */
7854 avctx->frame_number = s->picture_number - 1;
7856 return get_consumed_bytes(s, buf_index, buf_size);
7859 static inline void fill_mb_avail(H264Context *h){
7860 MpegEncContext * const s = &h->s;
7861 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7864 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7865 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7866 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7872 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7873 h->mb_avail[4]= 1; //FIXME move out
7874 h->mb_avail[5]= 0; //FIXME move out
7882 #define SIZE (COUNT*40)
7888 // int int_temp[10000];
7890 AVCodecContext avctx;
7892 dsputil_init(&dsp, &avctx);
7894 init_put_bits(&pb, temp, SIZE);
7895 printf("testing unsigned exp golomb\n");
7896 for(i=0; i<COUNT; i++){
7898 set_ue_golomb(&pb, i);
7899 STOP_TIMER("set_ue_golomb");
7901 flush_put_bits(&pb);
7903 init_get_bits(&gb, temp, 8*SIZE);
7904 for(i=0; i<COUNT; i++){
7907 s= show_bits(&gb, 24);
7910 j= get_ue_golomb(&gb);
7912 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7915 STOP_TIMER("get_ue_golomb");
7919 init_put_bits(&pb, temp, SIZE);
7920 printf("testing signed exp golomb\n");
7921 for(i=0; i<COUNT; i++){
7923 set_se_golomb(&pb, i - COUNT/2);
7924 STOP_TIMER("set_se_golomb");
7926 flush_put_bits(&pb);
7928 init_get_bits(&gb, temp, 8*SIZE);
7929 for(i=0; i<COUNT; i++){
7932 s= show_bits(&gb, 24);
7935 j= get_se_golomb(&gb);
7936 if(j != i - COUNT/2){
7937 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7940 STOP_TIMER("get_se_golomb");
7944 printf("testing 4x4 (I)DCT\n");
7947 uint8_t src[16], ref[16];
7948 uint64_t error= 0, max_error=0;
7950 for(i=0; i<COUNT; i++){
7952 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7953 for(j=0; j<16; j++){
7954 ref[j]= random()%255;
7955 src[j]= random()%255;
7958 h264_diff_dct_c(block, src, ref, 4);
7961 for(j=0; j<16; j++){
7962 // printf("%d ", block[j]);
7963 block[j]= block[j]*4;
7964 if(j&1) block[j]= (block[j]*4 + 2)/5;
7965 if(j&4) block[j]= (block[j]*4 + 2)/5;
7969 s->dsp.h264_idct_add(ref, block, 4);
7970 /* for(j=0; j<16; j++){
7971 printf("%d ", ref[j]);
7975 for(j=0; j<16; j++){
7976 int diff= FFABS(src[j] - ref[j]);
7979 max_error= FFMAX(max_error, diff);
7982 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7983 printf("testing quantizer\n");
7984 for(qp=0; qp<52; qp++){
7986 src1_block[i]= src2_block[i]= random()%255;
7989 printf("Testing NAL layer\n");
7991 uint8_t bitstream[COUNT];
7992 uint8_t nal[COUNT*2];
7994 memset(&h, 0, sizeof(H264Context));
7996 for(i=0; i<COUNT; i++){
8004 for(j=0; j<COUNT; j++){
8005 bitstream[j]= (random() % 255) + 1;
8008 for(j=0; j<zeros; j++){
8009 int pos= random() % COUNT;
8010 while(bitstream[pos] == 0){
8019 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8021 printf("encoding failed\n");
8025 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8029 if(out_length != COUNT){
8030 printf("incorrect length %d %d\n", out_length, COUNT);
8034 if(consumed != nal_length){
8035 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8039 if(memcmp(bitstream, out, COUNT)){
8040 printf("mismatch\n");
8046 printf("Testing RBSP\n");
8054 static int decode_end(AVCodecContext *avctx)
8056 H264Context *h = avctx->priv_data;
8057 MpegEncContext *s = &h->s;
8059 av_freep(&h->rbsp_buffer[0]);
8060 av_freep(&h->rbsp_buffer[1]);
8061 free_tables(h); //FIXME cleanup init stuff perhaps
8064 // memset(h, 0, sizeof(H264Context));
8070 AVCodec h264_decoder = {
8074 sizeof(H264Context),
8079 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,