2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
42 * Value of Picture.reference when Picture is not a reference picture, but
43 * is held for delayed output.
45 #define DELAYED_PIC_REF 4
47 static VLC coeff_token_vlc[4];
48 static VLC chroma_dc_coeff_token_vlc;
50 static VLC total_zeros_vlc[15];
51 static VLC chroma_dc_total_zeros_vlc[3];
53 static VLC run_vlc[6];
56 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
57 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
58 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
59 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
61 static av_always_inline uint32_t pack16to32(int a, int b){
62 #ifdef WORDS_BIGENDIAN
63 return (b&0xFFFF) + (a<<16);
65 return (a&0xFFFF) + (b<<16);
69 const uint8_t ff_rem6[52]={
70 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
73 const uint8_t ff_div6[52]={
74 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
80 * @param h height of the rectangle, should be a constant
81 * @param w width of the rectangle, should be a constant
82 * @param size the size of val (1 or 4), should be a constant
84 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
85 uint8_t *p= (uint8_t*)vp;
86 assert(size==1 || size==4);
92 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
93 assert((stride&(w-1))==0);
95 const uint16_t v= size==4 ? val : val*0x0101;
96 *(uint16_t*)(p + 0*stride)= v;
98 *(uint16_t*)(p + 1*stride)= v;
100 *(uint16_t*)(p + 2*stride)= v;
101 *(uint16_t*)(p + 3*stride)= v;
103 const uint32_t v= size==4 ? val : val*0x01010101;
104 *(uint32_t*)(p + 0*stride)= v;
106 *(uint32_t*)(p + 1*stride)= v;
108 *(uint32_t*)(p + 2*stride)= v;
109 *(uint32_t*)(p + 3*stride)= v;
111 //gcc can't optimize 64bit math on x86_32
112 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
113 const uint64_t v= val*0x0100000001ULL;
114 *(uint64_t*)(p + 0*stride)= v;
116 *(uint64_t*)(p + 1*stride)= v;
118 *(uint64_t*)(p + 2*stride)= v;
119 *(uint64_t*)(p + 3*stride)= v;
121 const uint64_t v= val*0x0100000001ULL;
122 *(uint64_t*)(p + 0+0*stride)= v;
123 *(uint64_t*)(p + 8+0*stride)= v;
124 *(uint64_t*)(p + 0+1*stride)= v;
125 *(uint64_t*)(p + 8+1*stride)= v;
127 *(uint64_t*)(p + 0+2*stride)= v;
128 *(uint64_t*)(p + 8+2*stride)= v;
129 *(uint64_t*)(p + 0+3*stride)= v;
130 *(uint64_t*)(p + 8+3*stride)= v;
132 *(uint32_t*)(p + 0+0*stride)= val;
133 *(uint32_t*)(p + 4+0*stride)= val;
135 *(uint32_t*)(p + 0+1*stride)= val;
136 *(uint32_t*)(p + 4+1*stride)= val;
138 *(uint32_t*)(p + 0+2*stride)= val;
139 *(uint32_t*)(p + 4+2*stride)= val;
140 *(uint32_t*)(p + 0+3*stride)= val;
141 *(uint32_t*)(p + 4+3*stride)= val;
143 *(uint32_t*)(p + 0+0*stride)= val;
144 *(uint32_t*)(p + 4+0*stride)= val;
145 *(uint32_t*)(p + 8+0*stride)= val;
146 *(uint32_t*)(p +12+0*stride)= val;
147 *(uint32_t*)(p + 0+1*stride)= val;
148 *(uint32_t*)(p + 4+1*stride)= val;
149 *(uint32_t*)(p + 8+1*stride)= val;
150 *(uint32_t*)(p +12+1*stride)= val;
152 *(uint32_t*)(p + 0+2*stride)= val;
153 *(uint32_t*)(p + 4+2*stride)= val;
154 *(uint32_t*)(p + 8+2*stride)= val;
155 *(uint32_t*)(p +12+2*stride)= val;
156 *(uint32_t*)(p + 0+3*stride)= val;
157 *(uint32_t*)(p + 4+3*stride)= val;
158 *(uint32_t*)(p + 8+3*stride)= val;
159 *(uint32_t*)(p +12+3*stride)= val;
166 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
167 MpegEncContext * const s = &h->s;
168 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
169 int topleft_xy, top_xy, topright_xy, left_xy[2];
170 int topleft_type, top_type, topright_type, left_type[2];
174 //FIXME deblocking could skip the intra and nnz parts.
175 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
178 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
180 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
181 topleft_xy = top_xy - 1;
182 topright_xy= top_xy + 1;
183 left_xy[1] = left_xy[0] = mb_xy-1;
193 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
194 const int top_pair_xy = pair_xy - s->mb_stride;
195 const int topleft_pair_xy = top_pair_xy - 1;
196 const int topright_pair_xy = top_pair_xy + 1;
197 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
198 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
199 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
200 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
201 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
202 const int bottom = (s->mb_y & 1);
203 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
205 ? !curr_mb_frame_flag // bottom macroblock
206 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
208 top_xy -= s->mb_stride;
211 ? !curr_mb_frame_flag // bottom macroblock
212 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
214 topleft_xy -= s->mb_stride;
217 ? !curr_mb_frame_flag // bottom macroblock
218 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
220 topright_xy -= s->mb_stride;
222 if (left_mb_frame_flag != curr_mb_frame_flag) {
223 left_xy[1] = left_xy[0] = pair_xy - 1;
224 if (curr_mb_frame_flag) {
245 left_xy[1] += s->mb_stride;
258 h->top_mb_xy = top_xy;
259 h->left_mb_xy[0] = left_xy[0];
260 h->left_mb_xy[1] = left_xy[1];
264 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
265 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
266 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
268 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
270 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
272 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
273 for(list=0; list<h->list_count; list++){
274 if(USES_LIST(mb_type,list)){
275 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
276 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
277 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
278 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
284 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
285 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
287 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
288 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
290 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
291 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
296 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
297 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
298 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
299 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
300 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
303 if(IS_INTRA(mb_type)){
304 h->topleft_samples_available=
305 h->top_samples_available=
306 h->left_samples_available= 0xFFFF;
307 h->topright_samples_available= 0xEEEA;
309 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
310 h->topleft_samples_available= 0xB3FF;
311 h->top_samples_available= 0x33FF;
312 h->topright_samples_available= 0x26EA;
315 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
316 h->topleft_samples_available&= 0xDF5F;
317 h->left_samples_available&= 0x5F5F;
321 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
322 h->topleft_samples_available&= 0x7FFF;
324 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
325 h->topright_samples_available&= 0xFBFF;
327 if(IS_INTRA4x4(mb_type)){
328 if(IS_INTRA4x4(top_type)){
329 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
330 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
331 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
332 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
335 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
340 h->intra4x4_pred_mode_cache[4+8*0]=
341 h->intra4x4_pred_mode_cache[5+8*0]=
342 h->intra4x4_pred_mode_cache[6+8*0]=
343 h->intra4x4_pred_mode_cache[7+8*0]= pred;
346 if(IS_INTRA4x4(left_type[i])){
347 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
348 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
351 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
356 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
357 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
372 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
374 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
375 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
376 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
377 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
379 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
380 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
382 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
383 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
386 h->non_zero_count_cache[4+8*0]=
387 h->non_zero_count_cache[5+8*0]=
388 h->non_zero_count_cache[6+8*0]=
389 h->non_zero_count_cache[7+8*0]=
391 h->non_zero_count_cache[1+8*0]=
392 h->non_zero_count_cache[2+8*0]=
394 h->non_zero_count_cache[1+8*3]=
395 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
399 for (i=0; i<2; i++) {
401 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
402 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
403 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
404 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
406 h->non_zero_count_cache[3+8*1 + 2*8*i]=
407 h->non_zero_count_cache[3+8*2 + 2*8*i]=
408 h->non_zero_count_cache[0+8*1 + 8*i]=
409 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
416 h->top_cbp = h->cbp_table[top_xy];
417 } else if(IS_INTRA(mb_type)) {
424 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
425 } else if(IS_INTRA(mb_type)) {
431 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
434 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
439 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
441 for(list=0; list<h->list_count; list++){
442 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
443 /*if(!h->mv_cache_clean[list]){
444 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
445 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
446 h->mv_cache_clean[list]= 1;
450 h->mv_cache_clean[list]= 0;
452 if(USES_LIST(top_type, list)){
453 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
454 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
455 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
456 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
457 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
458 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
459 h->ref_cache[list][scan8[0] + 0 - 1*8]=
460 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
461 h->ref_cache[list][scan8[0] + 2 - 1*8]=
462 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
464 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
465 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
466 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
467 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
468 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
472 int cache_idx = scan8[0] - 1 + i*2*8;
473 if(USES_LIST(left_type[i], list)){
474 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
475 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
476 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
477 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
478 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
479 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
481 *(uint32_t*)h->mv_cache [list][cache_idx ]=
482 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
483 h->ref_cache[list][cache_idx ]=
484 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
488 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
491 if(USES_LIST(topleft_type, list)){
492 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
493 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
494 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
495 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
497 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
498 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
501 if(USES_LIST(topright_type, list)){
502 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
503 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
504 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
505 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
507 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
508 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
511 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
514 h->ref_cache[list][scan8[5 ]+1] =
515 h->ref_cache[list][scan8[7 ]+1] =
516 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
517 h->ref_cache[list][scan8[4 ]] =
518 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
519 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
520 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
521 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
522 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
523 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
526 /* XXX beurk, Load mvd */
527 if(USES_LIST(top_type, list)){
528 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
529 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
530 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
531 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
532 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
534 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
535 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
536 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
537 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
539 if(USES_LIST(left_type[0], list)){
540 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
541 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
542 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
544 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
545 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
547 if(USES_LIST(left_type[1], list)){
548 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
549 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
550 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
552 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
553 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
555 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
556 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
557 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
558 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
559 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
561 if(h->slice_type == B_TYPE){
562 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
564 if(IS_DIRECT(top_type)){
565 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
566 }else if(IS_8X8(top_type)){
567 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
568 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
569 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
571 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
574 if(IS_DIRECT(left_type[0]))
575 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
576 else if(IS_8X8(left_type[0]))
577 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
579 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
581 if(IS_DIRECT(left_type[1]))
582 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
583 else if(IS_8X8(left_type[1]))
584 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
586 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
592 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
593 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
594 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
595 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
596 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
597 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
598 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
599 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
600 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
601 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
603 #define MAP_F2F(idx, mb_type)\
604 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
605 h->ref_cache[list][idx] <<= 1;\
606 h->mv_cache[list][idx][1] /= 2;\
607 h->mvd_cache[list][idx][1] /= 2;\
612 #define MAP_F2F(idx, mb_type)\
613 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
614 h->ref_cache[list][idx] >>= 1;\
615 h->mv_cache[list][idx][1] <<= 1;\
616 h->mvd_cache[list][idx][1] <<= 1;\
626 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
629 static inline void write_back_intra_pred_mode(H264Context *h){
630 MpegEncContext * const s = &h->s;
631 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
633 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
634 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
635 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
636 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
637 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
638 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
639 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
643 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
645 static inline int check_intra4x4_pred_mode(H264Context *h){
646 MpegEncContext * const s = &h->s;
647 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
648 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
651 if(!(h->top_samples_available&0x8000)){
653 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
655 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
658 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
663 if(!(h->left_samples_available&0x8000)){
665 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
667 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
670 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
676 } //FIXME cleanup like next
679 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
681 static inline int check_intra_pred_mode(H264Context *h, int mode){
682 MpegEncContext * const s = &h->s;
683 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
684 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
687 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
691 if(!(h->top_samples_available&0x8000)){
694 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
699 if(!(h->left_samples_available&0x8000)){
702 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
711 * gets the predicted intra4x4 prediction mode.
713 static inline int pred_intra_mode(H264Context *h, int n){
714 const int index8= scan8[n];
715 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
716 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
717 const int min= FFMIN(left, top);
719 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
721 if(min<0) return DC_PRED;
725 static inline void write_back_non_zero_count(H264Context *h){
726 MpegEncContext * const s = &h->s;
727 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
729 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
730 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
731 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
732 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
733 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
734 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
735 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
737 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
738 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
739 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
741 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
742 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
743 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
746 // store all luma nnzs, for deblocking
749 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
750 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
755 * gets the predicted number of non zero coefficients.
756 * @param n block index
758 static inline int pred_non_zero_count(H264Context *h, int n){
759 const int index8= scan8[n];
760 const int left= h->non_zero_count_cache[index8 - 1];
761 const int top = h->non_zero_count_cache[index8 - 8];
764 if(i<64) i= (i+1)>>1;
766 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
771 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
772 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
773 MpegEncContext *s = &h->s;
775 /* there is no consistent mapping of mvs to neighboring locations that will
776 * make mbaff happy, so we can't move all this logic to fill_caches */
778 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
780 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
781 *C = h->mv_cache[list][scan8[0]-2];
784 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
785 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
786 if(IS_INTERLACED(mb_types[topright_xy])){
787 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
788 const int x4 = X4, y4 = Y4;\
789 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
790 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
791 return LIST_NOT_USED;\
792 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
793 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
794 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
795 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
797 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
800 if(topright_ref == PART_NOT_AVAILABLE
801 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
802 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
804 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
805 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
808 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
810 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
811 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
817 if(topright_ref != PART_NOT_AVAILABLE){
818 *C= h->mv_cache[list][ i - 8 + part_width ];
821 tprintf(s->avctx, "topright MV not available\n");
823 *C= h->mv_cache[list][ i - 8 - 1 ];
824 return h->ref_cache[list][ i - 8 - 1 ];
829 * gets the predicted MV.
830 * @param n the block index
831 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
832 * @param mx the x component of the predicted motion vector
833 * @param my the y component of the predicted motion vector
835 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
836 const int index8= scan8[n];
837 const int top_ref= h->ref_cache[list][ index8 - 8 ];
838 const int left_ref= h->ref_cache[list][ index8 - 1 ];
839 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
840 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
842 int diagonal_ref, match_count;
844 assert(part_width==1 || part_width==2 || part_width==4);
854 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
855 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
856 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
857 if(match_count > 1){ //most common
858 *mx= mid_pred(A[0], B[0], C[0]);
859 *my= mid_pred(A[1], B[1], C[1]);
860 }else if(match_count==1){
864 }else if(top_ref==ref){
872 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
876 *mx= mid_pred(A[0], B[0], C[0]);
877 *my= mid_pred(A[1], B[1], C[1]);
881 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
885 * gets the directionally predicted 16x8 MV.
886 * @param n the block index
887 * @param mx the x component of the predicted motion vector
888 * @param my the y component of the predicted motion vector
890 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
892 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
893 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
895 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
903 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
904 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
906 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
916 pred_motion(h, n, 4, list, ref, mx, my);
920 * gets the directionally predicted 8x16 MV.
921 * @param n the block index
922 * @param mx the x component of the predicted motion vector
923 * @param my the y component of the predicted motion vector
925 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
927 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
928 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
930 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
941 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
943 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
945 if(diagonal_ref == ref){
953 pred_motion(h, n, 2, list, ref, mx, my);
956 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
957 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
958 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
960 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
962 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
963 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
964 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
970 pred_motion(h, 0, 4, 0, 0, mx, my);
975 static inline void direct_dist_scale_factor(H264Context * const h){
976 const int poc = h->s.current_picture_ptr->poc;
977 const int poc1 = h->ref_list[1][0].poc;
979 for(i=0; i<h->ref_count[0]; i++){
980 int poc0 = h->ref_list[0][i].poc;
981 int td = av_clip(poc1 - poc0, -128, 127);
982 if(td == 0 /* FIXME || pic0 is a long-term ref */){
983 h->dist_scale_factor[i] = 256;
985 int tb = av_clip(poc - poc0, -128, 127);
986 int tx = (16384 + (FFABS(td) >> 1)) / td;
987 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
991 for(i=0; i<h->ref_count[0]; i++){
992 h->dist_scale_factor_field[2*i] =
993 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
997 static inline void direct_ref_list_init(H264Context * const h){
998 MpegEncContext * const s = &h->s;
999 Picture * const ref1 = &h->ref_list[1][0];
1000 Picture * const cur = s->current_picture_ptr;
1002 if(cur->pict_type == I_TYPE)
1003 cur->ref_count[0] = 0;
1004 if(cur->pict_type != B_TYPE)
1005 cur->ref_count[1] = 0;
1006 for(list=0; list<2; list++){
1007 cur->ref_count[list] = h->ref_count[list];
1008 for(j=0; j<h->ref_count[list]; j++)
1009 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1011 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1013 for(list=0; list<2; list++){
1014 for(i=0; i<ref1->ref_count[list]; i++){
1015 const int poc = ref1->ref_poc[list][i];
1016 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1017 for(j=0; j<h->ref_count[list]; j++)
1018 if(h->ref_list[list][j].poc == poc){
1019 h->map_col_to_list0[list][i] = j;
1025 for(list=0; list<2; list++){
1026 for(i=0; i<ref1->ref_count[list]; i++){
1027 j = h->map_col_to_list0[list][i];
1028 h->map_col_to_list0_field[list][2*i] = 2*j;
1029 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1035 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1036 MpegEncContext * const s = &h->s;
1037 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1038 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1039 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1040 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1041 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1042 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1043 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1044 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1045 const int is_b8x8 = IS_8X8(*mb_type);
1046 unsigned int sub_mb_type;
1049 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1050 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1051 /* FIXME save sub mb types from previous frames (or derive from MVs)
1052 * so we know exactly what block size to use */
1053 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1054 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1055 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1056 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1057 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1059 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1060 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1063 *mb_type |= MB_TYPE_DIRECT2;
1065 *mb_type |= MB_TYPE_INTERLACED;
1067 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1069 if(h->direct_spatial_mv_pred){
1074 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1076 /* ref = min(neighbors) */
1077 for(list=0; list<2; list++){
1078 int refa = h->ref_cache[list][scan8[0] - 1];
1079 int refb = h->ref_cache[list][scan8[0] - 8];
1080 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1082 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1084 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1086 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1092 if(ref[0] < 0 && ref[1] < 0){
1093 ref[0] = ref[1] = 0;
1094 mv[0][0] = mv[0][1] =
1095 mv[1][0] = mv[1][1] = 0;
1097 for(list=0; list<2; list++){
1099 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1101 mv[list][0] = mv[list][1] = 0;
1106 *mb_type &= ~MB_TYPE_P0L1;
1107 sub_mb_type &= ~MB_TYPE_P0L1;
1108 }else if(ref[0] < 0){
1109 *mb_type &= ~MB_TYPE_P0L0;
1110 sub_mb_type &= ~MB_TYPE_P0L0;
1113 if(IS_16X16(*mb_type)){
1116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1117 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1118 if(!IS_INTRA(mb_type_col)
1119 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1120 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1121 && (h->x264_build>33 || !h->x264_build)))){
1123 a= pack16to32(mv[0][0],mv[0][1]);
1125 b= pack16to32(mv[1][0],mv[1][1]);
1127 a= pack16to32(mv[0][0],mv[0][1]);
1128 b= pack16to32(mv[1][0],mv[1][1]);
1130 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1131 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1133 for(i8=0; i8<4; i8++){
1134 const int x8 = i8&1;
1135 const int y8 = i8>>1;
1137 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1139 h->sub_mb_type[i8] = sub_mb_type;
1141 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1142 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1143 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1144 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1147 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1148 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1149 && (h->x264_build>33 || !h->x264_build)))){
1150 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1151 if(IS_SUB_8X8(sub_mb_type)){
1152 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1153 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1155 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1157 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1160 for(i4=0; i4<4; i4++){
1161 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1162 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1164 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1166 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1172 }else{ /* direct temporal mv pred */
1173 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1174 const int *dist_scale_factor = h->dist_scale_factor;
1177 if(IS_INTERLACED(*mb_type)){
1178 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1179 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1180 dist_scale_factor = h->dist_scale_factor_field;
1182 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1183 /* FIXME assumes direct_8x8_inference == 1 */
1184 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1185 int mb_types_col[2];
1188 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1189 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1190 | (*mb_type & MB_TYPE_INTERLACED);
1191 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1193 if(IS_INTERLACED(*mb_type)){
1194 /* frame to field scaling */
1195 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1196 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1198 l1ref0 -= 2*h->b8_stride;
1199 l1ref1 -= 2*h->b8_stride;
1200 l1mv0 -= 4*h->b_stride;
1201 l1mv1 -= 4*h->b_stride;
1205 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1206 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1208 *mb_type |= MB_TYPE_16x8;
1210 *mb_type |= MB_TYPE_8x8;
1212 /* field to frame scaling */
1213 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1214 * but in MBAFF, top and bottom POC are equal */
1215 int dy = (s->mb_y&1) ? 1 : 2;
1217 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1218 l1ref0 += dy*h->b8_stride;
1219 l1ref1 += dy*h->b8_stride;
1220 l1mv0 += 2*dy*h->b_stride;
1221 l1mv1 += 2*dy*h->b_stride;
1224 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1226 *mb_type |= MB_TYPE_16x16;
1228 *mb_type |= MB_TYPE_8x8;
1231 for(i8=0; i8<4; i8++){
1232 const int x8 = i8&1;
1233 const int y8 = i8>>1;
1235 const int16_t (*l1mv)[2]= l1mv0;
1237 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1239 h->sub_mb_type[i8] = sub_mb_type;
1241 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1242 if(IS_INTRA(mb_types_col[y8])){
1243 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1244 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1245 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1249 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1251 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1253 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1256 scale = dist_scale_factor[ref0];
1257 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1260 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1261 int my_col = (mv_col[1]<<y_shift)/2;
1262 int mx = (scale * mv_col[0] + 128) >> 8;
1263 int my = (scale * my_col + 128) >> 8;
1264 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1265 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1272 /* one-to-one mv scaling */
1274 if(IS_16X16(*mb_type)){
1277 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1278 if(IS_INTRA(mb_type_col)){
1281 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1282 : map_col_to_list0[1][l1ref1[0]];
1283 const int scale = dist_scale_factor[ref0];
1284 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1286 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1287 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1289 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1290 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1292 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1293 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1294 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1296 for(i8=0; i8<4; i8++){
1297 const int x8 = i8&1;
1298 const int y8 = i8>>1;
1300 const int16_t (*l1mv)[2]= l1mv0;
1302 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1304 h->sub_mb_type[i8] = sub_mb_type;
1305 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1306 if(IS_INTRA(mb_type_col)){
1307 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1308 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1309 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1313 ref0 = l1ref0[x8 + y8*h->b8_stride];
1315 ref0 = map_col_to_list0[0][ref0];
1317 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1320 scale = dist_scale_factor[ref0];
1322 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1323 if(IS_SUB_8X8(sub_mb_type)){
1324 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1325 int mx = (scale * mv_col[0] + 128) >> 8;
1326 int my = (scale * mv_col[1] + 128) >> 8;
1327 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1328 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1330 for(i4=0; i4<4; i4++){
1331 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1332 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1333 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1334 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1335 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1336 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1343 static inline void write_back_motion(H264Context *h, int mb_type){
1344 MpegEncContext * const s = &h->s;
1345 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1346 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1349 if(!USES_LIST(mb_type, 0))
1350 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1352 for(list=0; list<h->list_count; list++){
1354 if(!USES_LIST(mb_type, list))
1358 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1359 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1361 if( h->pps.cabac ) {
1362 if(IS_SKIP(mb_type))
1363 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1366 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1367 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1372 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1373 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1374 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1375 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1376 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1380 if(h->slice_type == B_TYPE && h->pps.cabac){
1381 if(IS_8X8(mb_type)){
1382 uint8_t *direct_table = &h->direct_table[b8_xy];
1383 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1384 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1385 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1391 * Decodes a network abstraction layer unit.
1392 * @param consumed is the number of bytes used as input
1393 * @param length is the length of the array
1394 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1395 * @returns decoded bytes, might be src+1 if no escapes
1397 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1402 // src[0]&0x80; //forbidden bit
1403 h->nal_ref_idc= src[0]>>5;
1404 h->nal_unit_type= src[0]&0x1F;
1408 for(i=0; i<length; i++)
1409 printf("%2X ", src[i]);
1411 for(i=0; i+1<length; i+=2){
1412 if(src[i]) continue;
1413 if(i>0 && src[i-1]==0) i--;
1414 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1416 /* startcode, so we must be past the end */
1423 if(i>=length-1){ //no escaped 0
1424 *dst_length= length;
1425 *consumed= length+1; //+1 for the header
1429 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1430 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1431 dst= h->rbsp_buffer[bufidx];
1437 //printf("decoding esc\n");
1440 //remove escapes (very rare 1:2^22)
1441 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1442 if(src[si+2]==3){ //escape
1447 }else //next start code
1451 dst[di++]= src[si++];
1455 *consumed= si + 1;//+1 for the header
1456 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1461 * identifies the exact end of the bitstream
1462 * @return the length of the trailing, or 0 if damaged
1464 static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
1468 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1478 * idct tranforms the 16 dc values and dequantize them.
1479 * @param qp quantization parameter
1481 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1484 int temp[16]; //FIXME check if this is a good idea
1485 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1486 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1488 //memset(block, 64, 2*256);
1491 const int offset= y_offset[i];
1492 const int z0= block[offset+stride*0] + block[offset+stride*4];
1493 const int z1= block[offset+stride*0] - block[offset+stride*4];
1494 const int z2= block[offset+stride*1] - block[offset+stride*5];
1495 const int z3= block[offset+stride*1] + block[offset+stride*5];
1504 const int offset= x_offset[i];
1505 const int z0= temp[4*0+i] + temp[4*2+i];
1506 const int z1= temp[4*0+i] - temp[4*2+i];
1507 const int z2= temp[4*1+i] - temp[4*3+i];
1508 const int z3= temp[4*1+i] + temp[4*3+i];
1510 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1511 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1512 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1513 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1519 * dct tranforms the 16 dc values.
1520 * @param qp quantization parameter ??? FIXME
1522 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1523 // const int qmul= dequant_coeff[qp][0];
1525 int temp[16]; //FIXME check if this is a good idea
1526 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1527 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1530 const int offset= y_offset[i];
1531 const int z0= block[offset+stride*0] + block[offset+stride*4];
1532 const int z1= block[offset+stride*0] - block[offset+stride*4];
1533 const int z2= block[offset+stride*1] - block[offset+stride*5];
1534 const int z3= block[offset+stride*1] + block[offset+stride*5];
1543 const int offset= x_offset[i];
1544 const int z0= temp[4*0+i] + temp[4*2+i];
1545 const int z1= temp[4*0+i] - temp[4*2+i];
1546 const int z2= temp[4*1+i] - temp[4*3+i];
1547 const int z3= temp[4*1+i] + temp[4*3+i];
1549 block[stride*0 +offset]= (z0 + z3)>>1;
1550 block[stride*2 +offset]= (z1 + z2)>>1;
1551 block[stride*8 +offset]= (z1 - z2)>>1;
1552 block[stride*10+offset]= (z0 - z3)>>1;
1560 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1561 const int stride= 16*2;
1562 const int xStride= 16;
1565 a= block[stride*0 + xStride*0];
1566 b= block[stride*0 + xStride*1];
1567 c= block[stride*1 + xStride*0];
1568 d= block[stride*1 + xStride*1];
1575 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1576 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1577 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1578 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1582 static void chroma_dc_dct_c(DCTELEM *block){
1583 const int stride= 16*2;
1584 const int xStride= 16;
1587 a= block[stride*0 + xStride*0];
1588 b= block[stride*0 + xStride*1];
1589 c= block[stride*1 + xStride*0];
1590 d= block[stride*1 + xStride*1];
1597 block[stride*0 + xStride*0]= (a+c);
1598 block[stride*0 + xStride*1]= (e+b);
1599 block[stride*1 + xStride*0]= (a-c);
1600 block[stride*1 + xStride*1]= (e-b);
1605 * gets the chroma qp.
1607 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1608 return h->pps.chroma_qp_table[t][qscale & 0xff];
1611 //FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
1612 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1613 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1615 const int * const quant_table= quant_coeff[qscale];
1616 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1617 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1618 const unsigned int threshold2= (threshold1<<1);
1624 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1625 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1626 const unsigned int dc_threshold2= (dc_threshold1<<1);
1628 int level= block[0]*quant_coeff[qscale+18][0];
1629 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1631 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1634 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1637 // last_non_zero = i;
1642 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1643 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1644 const unsigned int dc_threshold2= (dc_threshold1<<1);
1646 int level= block[0]*quant_table[0];
1647 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1649 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1652 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1655 // last_non_zero = i;
1668 const int j= scantable[i];
1669 int level= block[j]*quant_table[j];
1671 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1672 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1673 if(((unsigned)(level+threshold1))>threshold2){
1675 level= (bias + level)>>QUANT_SHIFT;
1678 level= (bias - level)>>QUANT_SHIFT;
1687 return last_non_zero;
1690 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1691 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1692 int src_x_offset, int src_y_offset,
1693 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1694 MpegEncContext * const s = &h->s;
1695 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1696 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1697 const int luma_xy= (mx&3) + ((my&3)<<2);
1698 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1699 uint8_t * src_cb, * src_cr;
1700 int extra_width= h->emu_edge_width;
1701 int extra_height= h->emu_edge_height;
1703 const int full_mx= mx>>2;
1704 const int full_my= my>>2;
1705 const int pic_width = 16*s->mb_width;
1706 const int pic_height = 16*s->mb_height >> (MB_MBAFF || FIELD_PICTURE);
1708 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1711 if(mx&7) extra_width -= 3;
1712 if(my&7) extra_height -= 3;
1714 if( full_mx < 0-extra_width
1715 || full_my < 0-extra_height
1716 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1717 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1719 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1723 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1725 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1728 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1730 if(MB_MBAFF || FIELD_PICTURE){
1731 // chroma offset when predicting from a field of opposite parity
1732 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
1733 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1735 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1736 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1739 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1740 src_cb= s->edge_emu_buffer;
1742 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1745 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1746 src_cr= s->edge_emu_buffer;
1748 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1751 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1752 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1753 int x_offset, int y_offset,
1754 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1755 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1756 int list0, int list1){
1757 MpegEncContext * const s = &h->s;
1758 qpel_mc_func *qpix_op= qpix_put;
1759 h264_chroma_mc_func chroma_op= chroma_put;
1761 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1762 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1763 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1764 x_offset += 8*s->mb_x;
1765 y_offset += 8*(s->mb_y >> (MB_MBAFF || FIELD_PICTURE));
1768 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1769 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1770 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1771 qpix_op, chroma_op);
1774 chroma_op= chroma_avg;
1778 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1779 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1780 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1781 qpix_op, chroma_op);
1785 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1786 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 int x_offset, int y_offset,
1788 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1789 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1790 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1791 int list0, int list1){
1792 MpegEncContext * const s = &h->s;
1794 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1795 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1796 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1797 x_offset += 8*s->mb_x;
1798 y_offset += 8*(s->mb_y >> (MB_MBAFF || FIELD_PICTURE));
1801 /* don't optimize for luma-only case, since B-frames usually
1802 * use implicit weights => chroma too. */
1803 uint8_t *tmp_cb = s->obmc_scratchpad;
1804 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1805 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1806 int refn0 = h->ref_cache[0][ scan8[n] ];
1807 int refn1 = h->ref_cache[1][ scan8[n] ];
1809 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1810 dest_y, dest_cb, dest_cr,
1811 x_offset, y_offset, qpix_put, chroma_put);
1812 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1813 tmp_y, tmp_cb, tmp_cr,
1814 x_offset, y_offset, qpix_put, chroma_put);
1816 if(h->use_weight == 2){
1817 int weight0 = h->implicit_weight[refn0][refn1];
1818 int weight1 = 64 - weight0;
1819 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1820 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1821 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1823 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1824 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1825 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1826 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1827 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1828 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1829 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1831 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1834 int list = list1 ? 1 : 0;
1835 int refn = h->ref_cache[list][ scan8[n] ];
1836 Picture *ref= &h->ref_list[list][refn];
1837 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1838 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put, chroma_put);
1841 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1842 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1843 if(h->use_weight_chroma){
1844 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1845 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1846 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1847 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1852 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1853 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1854 int x_offset, int y_offset,
1855 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1856 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1857 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1858 int list0, int list1){
1859 if((h->use_weight==2 && list0 && list1
1860 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1861 || h->use_weight==1)
1862 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1863 x_offset, y_offset, qpix_put, chroma_put,
1864 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1866 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1867 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1870 static inline void prefetch_motion(H264Context *h, int list){
1871 /* fetch pixels for estimated mv 4 macroblocks ahead
1872 * optimized for 64byte cache lines */
1873 MpegEncContext * const s = &h->s;
1874 const int refn = h->ref_cache[list][scan8[0]];
1876 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1877 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1878 uint8_t **src= h->ref_list[list][refn].data;
1879 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1880 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1881 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1882 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1886 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1887 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1888 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1889 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1890 MpegEncContext * const s = &h->s;
1891 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1892 const int mb_type= s->current_picture.mb_type[mb_xy];
1894 assert(IS_INTER(mb_type));
1896 prefetch_motion(h, 0);
1898 if(IS_16X16(mb_type)){
1899 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1900 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1901 &weight_op[0], &weight_avg[0],
1902 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1903 }else if(IS_16X8(mb_type)){
1904 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1905 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1906 &weight_op[1], &weight_avg[1],
1907 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1908 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1909 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1910 &weight_op[1], &weight_avg[1],
1911 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1912 }else if(IS_8X16(mb_type)){
1913 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1914 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1915 &weight_op[2], &weight_avg[2],
1916 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1917 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1918 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1919 &weight_op[2], &weight_avg[2],
1920 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1924 assert(IS_8X8(mb_type));
1927 const int sub_mb_type= h->sub_mb_type[i];
1929 int x_offset= (i&1)<<2;
1930 int y_offset= (i&2)<<1;
1932 if(IS_SUB_8X8(sub_mb_type)){
1933 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1934 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1935 &weight_op[3], &weight_avg[3],
1936 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1937 }else if(IS_SUB_8X4(sub_mb_type)){
1938 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1939 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1940 &weight_op[4], &weight_avg[4],
1941 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1942 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1943 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1944 &weight_op[4], &weight_avg[4],
1945 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1946 }else if(IS_SUB_4X8(sub_mb_type)){
1947 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1948 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1949 &weight_op[5], &weight_avg[5],
1950 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1951 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1952 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1953 &weight_op[5], &weight_avg[5],
1954 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1957 assert(IS_SUB_4X4(sub_mb_type));
1959 int sub_x_offset= x_offset + 2*(j&1);
1960 int sub_y_offset= y_offset + (j&2);
1961 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1962 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1963 &weight_op[6], &weight_avg[6],
1964 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1970 prefetch_motion(h, 1);
1973 static void decode_init_vlc(void){
1974 static int done = 0;
1980 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1981 &chroma_dc_coeff_token_len [0], 1, 1,
1982 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1985 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1986 &coeff_token_len [i][0], 1, 1,
1987 &coeff_token_bits[i][0], 1, 1, 1);
1991 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1992 &chroma_dc_total_zeros_len [i][0], 1, 1,
1993 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1995 for(i=0; i<15; i++){
1996 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1997 &total_zeros_len [i][0], 1, 1,
1998 &total_zeros_bits[i][0], 1, 1, 1);
2002 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
2003 &run_len [i][0], 1, 1,
2004 &run_bits[i][0], 1, 1, 1);
2006 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2007 &run_len [6][0], 1, 1,
2008 &run_bits[6][0], 1, 1, 1);
2012 static void free_tables(H264Context *h){
2015 av_freep(&h->intra4x4_pred_mode);
2016 av_freep(&h->chroma_pred_mode_table);
2017 av_freep(&h->cbp_table);
2018 av_freep(&h->mvd_table[0]);
2019 av_freep(&h->mvd_table[1]);
2020 av_freep(&h->direct_table);
2021 av_freep(&h->non_zero_count);
2022 av_freep(&h->slice_table_base);
2023 h->slice_table= NULL;
2025 av_freep(&h->mb2b_xy);
2026 av_freep(&h->mb2b8_xy);
2028 for(i = 0; i < MAX_SPS_COUNT; i++)
2029 av_freep(h->sps_buffers + i);
2031 for(i = 0; i < MAX_PPS_COUNT; i++)
2032 av_freep(h->pps_buffers + i);
2034 for(i = 0; i < h->s.avctx->thread_count; i++) {
2035 hx = h->thread_context[i];
2037 av_freep(&hx->top_borders[1]);
2038 av_freep(&hx->top_borders[0]);
2039 av_freep(&hx->s.obmc_scratchpad);
2040 av_freep(&hx->s.allocated_edge_emu_buffer);
2044 static void init_dequant8_coeff_table(H264Context *h){
2046 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2047 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2048 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2050 for(i=0; i<2; i++ ){
2051 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2052 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2056 for(q=0; q<52; q++){
2057 int shift = ff_div6[q];
2058 int idx = ff_rem6[q];
2060 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2061 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2062 h->pps.scaling_matrix8[i][x]) << shift;
2067 static void init_dequant4_coeff_table(H264Context *h){
2069 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2070 for(i=0; i<6; i++ ){
2071 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2073 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2074 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2081 for(q=0; q<52; q++){
2082 int shift = ff_div6[q] + 2;
2083 int idx = ff_rem6[q];
2085 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2086 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2087 h->pps.scaling_matrix4[i][x]) << shift;
2092 static void init_dequant_tables(H264Context *h){
2094 init_dequant4_coeff_table(h);
2095 if(h->pps.transform_8x8_mode)
2096 init_dequant8_coeff_table(h);
2097 if(h->sps.transform_bypass){
2100 h->dequant4_coeff[i][0][x] = 1<<6;
2101 if(h->pps.transform_8x8_mode)
2104 h->dequant8_coeff[i][0][x] = 1<<6;
2111 * needs width/height
2113 static int alloc_tables(H264Context *h){
2114 MpegEncContext * const s = &h->s;
2115 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2118 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2120 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2121 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2124 if( h->pps.cabac ) {
2125 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2126 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2127 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
2138 const int mb_xy= x + y*s->mb_stride;
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2147 s->obmc_scratchpad = NULL;
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2159 * Mimic alloc_tables(), but for every context thread.
2161 static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2179 * Allocate buffers which are not shared amongst multiple threads.
2181 static int context_init(H264Context *h){
2182 MpegEncContext * const s = &h->s;
2184 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2185 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2187 // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
2188 CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
2189 (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
2190 s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
2193 return -1; // free_tables will clean up for us
2196 static void common_init(H264Context *h){
2197 MpegEncContext * const s = &h->s;
2199 s->width = s->avctx->width;
2200 s->height = s->avctx->height;
2201 s->codec_id= s->avctx->codec->id;
2203 ff_h264_pred_init(&h->hpc, s->codec_id);
2205 h->dequant_coeff_pps= -1;
2206 s->unrestricted_mv=1;
2207 s->decode=1; //FIXME
2209 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2210 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2213 static int decode_init(AVCodecContext *avctx){
2214 H264Context *h= avctx->priv_data;
2215 MpegEncContext * const s = &h->s;
2217 MPV_decode_defaults(s);
2222 s->out_format = FMT_H264;
2223 s->workaround_bugs= avctx->workaround_bugs;
2226 // s->decode_mb= ff_h263_decode_mb;
2227 s->quarter_sample = 1;
2229 avctx->pix_fmt= PIX_FMT_YUV420P;
2233 if(avctx->extradata_size > 0 && avctx->extradata &&
2234 *(char *)avctx->extradata == 1){
2241 h->thread_context[0] = h;
2245 static int frame_start(H264Context *h){
2246 MpegEncContext * const s = &h->s;
2249 if(MPV_frame_start(s, s->avctx) < 0)
2251 ff_er_frame_start(s);
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
2255 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2256 * See decode_nal_units().
2258 s->current_picture_ptr->key_frame= 0;
2260 assert(s->linesize && s->uvlinesize);
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2279 /* some macroblocks will be accessed before they're available */
2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2283 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2287 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2288 MpegEncContext * const s = &h->s;
2292 src_cb -= uvlinesize;
2293 src_cr -= uvlinesize;
2295 // There are two lines saved, the line above the the top macroblock of a pair,
2296 // and the line above the bottom macroblock
2297 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2298 for(i=1; i<17; i++){
2299 h->left_border[i]= src_y[15+i* linesize];
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2305 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2306 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2307 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2309 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2310 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2312 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2313 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2317 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2318 MpegEncContext * const s = &h->s;
2325 if(h->deblocking_filter == 2) {
2326 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2327 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2328 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2330 deblock_left = (s->mb_x > 0);
2331 deblock_top = (s->mb_y > 0);
2334 src_y -= linesize + 1;
2335 src_cb -= uvlinesize + 1;
2336 src_cr -= uvlinesize + 1;
2338 #define XCHG(a,b,t,xchg)\
2345 for(i = !deblock_top; i<17; i++){
2346 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2351 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2352 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2353 if(s->mb_x+1 < s->mb_width){
2354 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2358 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2360 for(i = !deblock_top; i<9; i++){
2361 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2362 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2366 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2367 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2372 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2373 MpegEncContext * const s = &h->s;
2376 src_y -= 2 * linesize;
2377 src_cb -= 2 * uvlinesize;
2378 src_cr -= 2 * uvlinesize;
2380 // There are two lines saved, the line above the the top macroblock of a pair,
2381 // and the line above the bottom macroblock
2382 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2383 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2384 for(i=2; i<34; i++){
2385 h->left_border[i]= src_y[15+i* linesize];
2388 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2389 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2390 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2391 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2393 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2394 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2395 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2396 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2397 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2398 for(i=2; i<18; i++){
2399 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2400 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2402 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2403 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2404 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2405 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2409 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2410 MpegEncContext * const s = &h->s;
2413 int deblock_left = (s->mb_x > 0);
2414 int deblock_top = (s->mb_y > 1);
2416 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2418 src_y -= 2 * linesize + 1;
2419 src_cb -= 2 * uvlinesize + 1;
2420 src_cr -= 2 * uvlinesize + 1;
2422 #define XCHG(a,b,t,xchg)\
2429 for(i = (!deblock_top)<<1; i<34; i++){
2430 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2435 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2436 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2437 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2438 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2439 if(s->mb_x+1 < s->mb_width){
2440 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2441 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2445 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2447 for(i = (!deblock_top) << 1; i<18; i++){
2448 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2449 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2453 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2454 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2455 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2456 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2461 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2462 MpegEncContext * const s = &h->s;
2463 const int mb_x= s->mb_x;
2464 const int mb_y= s->mb_y;
2465 const int mb_xy= mb_x + mb_y*s->mb_stride;
2466 const int mb_type= s->current_picture.mb_type[mb_xy];
2467 uint8_t *dest_y, *dest_cb, *dest_cr;
2468 int linesize, uvlinesize /*dct_offset*/;
2470 int *block_offset = &h->block_offset[0];
2471 const unsigned int bottom = mb_y & 1;
2472 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2473 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2474 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2476 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2477 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2478 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2480 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2481 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2483 if (!simple && MB_FIELD) {
2484 linesize = h->mb_linesize = s->linesize * 2;
2485 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2486 block_offset = &h->block_offset[24];
2487 if(mb_y&1){ //FIXME move out of this func?
2488 dest_y -= s->linesize*15;
2489 dest_cb-= s->uvlinesize*7;
2490 dest_cr-= s->uvlinesize*7;
2494 for(list=0; list<h->list_count; list++){
2495 if(!USES_LIST(mb_type, list))
2497 if(IS_16X16(mb_type)){
2498 int8_t *ref = &h->ref_cache[list][scan8[0]];
2499 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
2501 for(i=0; i<16; i+=4){
2502 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2503 int ref = h->ref_cache[list][scan8[i]];
2505 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
2511 linesize = h->mb_linesize = s->linesize;
2512 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2513 // dct_offset = s->linesize * 16;
2516 if(transform_bypass){
2518 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2519 }else if(IS_8x8DCT(mb_type)){
2520 idct_dc_add = s->dsp.h264_idct8_dc_add;
2521 idct_add = s->dsp.h264_idct8_add;
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2527 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2528 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2529 int mbt_y = mb_y&~1;
2530 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2531 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2532 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2533 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2536 if (!simple && IS_INTRA_PCM(mb_type)) {
2539 // The pixels are stored in h->mb array in the same order as levels,
2540 // copy them in output in the correct order.
2541 for(i=0; i<16; i++) {
2542 for (y=0; y<4; y++) {
2543 for (x=0; x<4; x++) {
2544 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2548 for(i=16; i<16+4; i++) {
2549 for (y=0; y<4; y++) {
2550 for (x=0; x<4; x++) {
2551 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2555 for(i=20; i<20+4; i++) {
2556 for (y=0; y<4; y++) {
2557 for (x=0; x<4; x++) {
2558 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2563 if(IS_INTRA(mb_type)){
2564 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2565 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2567 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2568 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2569 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2572 if(IS_INTRA4x4(mb_type)){
2573 if(simple || !s->encoding){
2574 if(IS_8x8DCT(mb_type)){
2575 for(i=0; i<16; i+=4){
2576 uint8_t * const ptr= dest_y + block_offset[i];
2577 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2578 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2579 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2580 (h->topright_samples_available<<i)&0x4000, linesize);
2582 if(nnz == 1 && h->mb[i*16])
2583 idct_dc_add(ptr, h->mb + i*16, linesize);
2585 idct_add(ptr, h->mb + i*16, linesize);
2589 for(i=0; i<16; i++){
2590 uint8_t * const ptr= dest_y + block_offset[i];
2592 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2595 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2596 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2597 assert(mb_y || linesize <= block_offset[i]);
2598 if(!topright_avail){
2599 tr= ptr[3 - linesize]*0x01010101;
2600 topright= (uint8_t*) &tr;
2602 topright= ptr + 4 - linesize;
2606 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2607 nnz = h->non_zero_count_cache[ scan8[i] ];
2610 if(nnz == 1 && h->mb[i*16])
2611 idct_dc_add(ptr, h->mb + i*16, linesize);
2613 idct_add(ptr, h->mb + i*16, linesize);
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2620 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2622 if(!transform_bypass)
2623 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2625 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2627 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2628 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2630 hl_motion(h, dest_y, dest_cb, dest_cr,
2631 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2632 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2633 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2637 if(!IS_INTRA4x4(mb_type)){
2639 if(IS_INTRA16x16(mb_type)){
2640 for(i=0; i<16; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2647 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2648 for(i=0; i<16; i+=di){
2649 int nnz = h->non_zero_count_cache[ scan8[i] ];
2651 if(nnz==1 && h->mb[i*16])
2652 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2654 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2659 for(i=0; i<16; i++){
2660 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2661 uint8_t * const ptr= dest_y + block_offset[i];
2662 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2668 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2669 uint8_t *dest[2] = {dest_cb, dest_cr};
2670 if(transform_bypass){
2671 idct_add = idct_dc_add = s->dsp.add_pixels4;
2673 idct_add = s->dsp.h264_idct_add;
2674 idct_dc_add = s->dsp.h264_idct_dc_add;
2675 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2676 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2679 for(i=16; i<16+8; i++){
2680 if(h->non_zero_count_cache[ scan8[i] ])
2681 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2682 else if(h->mb[i*16])
2683 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2686 for(i=16; i<16+8; i++){
2687 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2688 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2689 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2695 if(h->deblocking_filter) {
2696 if (!simple && FRAME_MBAFF) {
2697 //FIXME try deblocking one mb at a time?
2698 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2699 const int mb_y = s->mb_y - 1;
2700 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2701 const int mb_xy= mb_x + mb_y*s->mb_stride;
2702 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2703 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2704 if (!bottom) return;
2705 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2706 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2707 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2709 if(IS_INTRA(mb_type_top | mb_type_bottom))
2710 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2712 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2716 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2717 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2718 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2719 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2720 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2723 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2724 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2725 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2726 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2727 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2729 tprintf(h->s.avctx, "call filter_mb\n");
2730 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2731 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2732 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2738 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2740 static void hl_decode_mb_simple(H264Context *h){
2741 hl_decode_mb_internal(h, 1);
2745 * Process a macroblock; this handles edge cases, such as interlacing.
2747 static void av_noinline hl_decode_mb_complex(H264Context *h){
2748 hl_decode_mb_internal(h, 0);
2751 static void hl_decode_mb(H264Context *h){
2752 MpegEncContext * const s = &h->s;
2753 const int mb_x= s->mb_x;
2754 const int mb_y= s->mb_y;
2755 const int mb_xy= mb_x + mb_y*s->mb_stride;
2756 const int mb_type= s->current_picture.mb_type[mb_xy];
2757 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2763 hl_decode_mb_complex(h);
2764 else hl_decode_mb_simple(h);
2767 static void pic_as_field(Picture *pic, const int bottom){
2769 for (i = 0; i < 4; ++i) {
2771 pic->data[i] += pic->linesize[i];
2772 pic->linesize[i] *= 2;
2776 static int split_field_copy(Picture *dest, Picture *src,
2777 int parity, int id_add){
2778 int match = !!(src->reference & parity);
2782 pic_as_field(dest, parity == PICT_BOTTOM_FIELD);
2784 dest->pic_id += id_add;
2791 * Split one reference list into field parts, interleaving by parity
2792 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2793 * set to look at the actual start of data for that field.
2795 * @param dest output list
2796 * @param dest_len maximum number of fields to put in dest
2797 * @param src the source reference list containing fields and/or field pairs
2798 * (aka short_ref/long_ref, or
2799 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2800 * @param src_len number of Picture's in source (pairs and unmatched fields)
2801 * @param parity the parity of the picture being decoded/needing
2802 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2803 * @return number of fields placed in dest
2805 static int split_field_half_ref_list(Picture *dest, int dest_len,
2806 Picture *src, int src_len, int parity){
2807 int same_parity = 1;
2813 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2814 if (same_parity && same_i < src_len) {
2815 field_output = split_field_copy(dest + out_i, src + same_i,
2817 same_parity = !field_output;
2820 } else if (opp_i < src_len) {
2821 field_output = split_field_copy(dest + out_i, src + opp_i,
2822 PICT_FRAME - parity, 0);
2823 same_parity = field_output;
2835 * Split the reference frame list into a reference field list.
2836 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2837 * The input list contains both reference field pairs and
2838 * unmatched reference fields; it is ordered as spec describes
2839 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2840 * unmatched field pairs are also present. Conceptually this is equivalent
2841 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2843 * @param dest output reference list where ordered fields are to be placed
2844 * @param dest_len max number of fields to place at dest
2845 * @param src source reference list, as described above
2846 * @param src_len number of pictures (pairs and unmatched fields) in src
2847 * @param parity parity of field being currently decoded
2848 * (one of PICT_{TOP,BOTTOM}_FIELD)
2849 * @param long_i index into src array that holds first long reference picture,
2850 * or src_len if no long refs present.
2852 static int split_field_ref_list(Picture *dest, int dest_len,
2853 Picture *src, int src_len,
2854 int parity, int long_i){
2856 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2860 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2861 src_len - long_i, parity);
2866 * fills the default_ref_list.
2868 static int fill_default_ref_list(H264Context *h){
2869 MpegEncContext * const s = &h->s;
2871 int smallest_poc_greater_than_current = -1;
2873 Picture sorted_short_ref[32];
2874 Picture field_entry_list[2][32];
2875 Picture *frame_list[2];
2877 if (FIELD_PICTURE) {
2878 structure_sel = PICT_FRAME;
2879 frame_list[0] = field_entry_list[0];
2880 frame_list[1] = field_entry_list[1];
2883 frame_list[0] = h->default_ref_list[0];
2884 frame_list[1] = h->default_ref_list[1];
2887 if(h->slice_type==B_TYPE){
2894 /* sort frame according to poc in B slice */
2895 for(out_i=0; out_i<h->short_ref_count; out_i++){
2897 int best_poc=INT_MAX;
2899 for(i=0; i<h->short_ref_count; i++){
2900 const int poc= h->short_ref[i]->poc;
2901 if(poc > limit && poc < best_poc){
2907 assert(best_i != INT_MIN);
2910 sorted_short_ref[out_i]= *h->short_ref[best_i];
2911 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2912 if (-1 == smallest_poc_greater_than_current) {
2913 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2914 smallest_poc_greater_than_current = out_i;
2919 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2921 // find the largest poc
2922 for(list=0; list<2; list++){
2925 int step= list ? -1 : 1;
2927 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2929 while(j<0 || j>= h->short_ref_count){
2930 if(j != -99 && step == (list ? -1 : 1))
2933 j= smallest_poc_greater_than_current + (step>>1);
2935 sel = sorted_short_ref[j].reference | structure_sel;
2936 if(sel != PICT_FRAME) continue;
2937 frame_list[list][index ]= sorted_short_ref[j];
2938 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2940 short_len[list] = index;
2942 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2944 if(h->long_ref[i] == NULL) continue;
2945 sel = h->long_ref[i]->reference | structure_sel;
2946 if(sel != PICT_FRAME) continue;
2948 frame_list[ list ][index ]= *h->long_ref[i];
2949 frame_list[ list ][index++].pic_id= i;;
2953 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
2954 // swap the two first elements of L1 when
2955 // L0 and L1 are identical
2956 Picture temp= frame_list[1][0];
2957 frame_list[1][0] = frame_list[1][1];
2958 frame_list[1][1] = temp;
2963 for(list=0; list<2; list++){
2965 len[list] = split_field_ref_list(h->default_ref_list[list],
2969 s->picture_structure,
2972 if(len[list] < h->ref_count[ list ])
2973 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2980 for(i=0; i<h->short_ref_count; i++){
2982 sel = h->short_ref[i]->reference | structure_sel;
2983 if(sel != PICT_FRAME) continue;
2984 frame_list[0][index ]= *h->short_ref[i];
2985 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2988 for(i = 0; i < 16; i++){
2990 if(h->long_ref[i] == NULL) continue;
2991 sel = h->long_ref[i]->reference | structure_sel;
2992 if(sel != PICT_FRAME) continue;
2993 frame_list[0][index ]= *h->long_ref[i];
2994 frame_list[0][index++].pic_id= i;;
2998 index = split_field_ref_list(h->default_ref_list[0],
2999 h->ref_count[0], frame_list[0],
3000 index, s->picture_structure,
3003 if(index < h->ref_count[0])
3004 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3007 for (i=0; i<h->ref_count[0]; i++) {
3008 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3010 if(h->slice_type==B_TYPE){
3011 for (i=0; i<h->ref_count[1]; i++) {
3012 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
3019 static void print_short_term(H264Context *h);
3020 static void print_long_term(H264Context *h);
3023 * Extract structure information about the picture described by pic_num in
3024 * the current decoding context (frame or field). Note that pic_num is
3025 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
3026 * @param pic_num picture number for which to extract structure information
3027 * @param structure one of PICT_XXX describing structure of picture
3029 * @return frame number (short term) or long term index of picture
3030 * described by pic_num
3032 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3033 MpegEncContext * const s = &h->s;
3035 *structure = s->picture_structure;
3038 /* opposite field */
3039 *structure ^= PICT_FRAME;
3046 static int decode_ref_pic_list_reordering(H264Context *h){
3047 MpegEncContext * const s = &h->s;
3048 int list, index, pic_structure;
3050 print_short_term(h);
3052 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
3054 for(list=0; list<h->list_count; list++){
3055 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3057 if(get_bits1(&s->gb)){
3058 int pred= h->curr_pic_num;
3060 for(index=0; ; index++){
3061 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3062 unsigned int pic_id;
3064 Picture *ref = NULL;
3066 if(reordering_of_pic_nums_idc==3)
3069 if(index >= h->ref_count[list]){
3070 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3074 if(reordering_of_pic_nums_idc<3){
3075 if(reordering_of_pic_nums_idc<2){
3076 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3079 if(abs_diff_pic_num >= h->max_pic_num){
3080 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3084 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3085 else pred+= abs_diff_pic_num;
3086 pred &= h->max_pic_num - 1;
3088 frame_num = pic_num_extract(h, pred, &pic_structure);
3090 for(i= h->short_ref_count-1; i>=0; i--){
3091 ref = h->short_ref[i];
3092 assert(ref->reference);
3093 assert(!ref->long_ref);
3094 if(ref->data[0] != NULL &&
3095 ref->frame_num == frame_num &&
3096 (ref->reference & pic_structure) &&
3097 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3104 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3106 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3109 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3112 ref = h->long_ref[long_idx];
3113 assert(!(ref && !ref->reference));
3114 if(ref && (ref->reference & pic_structure)){
3115 ref->pic_id= pic_id;
3116 assert(ref->long_ref);
3124 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3125 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3127 for(i=index; i+1<h->ref_count[list]; i++){
3128 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3131 for(; i > index; i--){
3132 h->ref_list[list][i]= h->ref_list[list][i-1];
3134 h->ref_list[list][index]= *ref;
3136 int bot = pic_structure == PICT_BOTTOM_FIELD;
3137 pic_as_field(&h->ref_list[list][index], bot);
3141 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3147 for(list=0; list<h->list_count; list++){
3148 for(index= 0; index < h->ref_count[list]; index++){
3149 if(!h->ref_list[list][index].data[0])
3150 h->ref_list[list][index]= s->current_picture;
3154 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
3155 direct_dist_scale_factor(h);
3156 direct_ref_list_init(h);
3160 static void fill_mbaff_ref_list(H264Context *h){
3162 for(list=0; list<2; list++){ //FIXME try list_count
3163 for(i=0; i<h->ref_count[list]; i++){
3164 Picture *frame = &h->ref_list[list][i];
3165 Picture *field = &h->ref_list[list][16+2*i];
3168 field[0].linesize[j] <<= 1;
3169 field[1] = field[0];
3171 field[1].data[j] += frame->linesize[j];
3173 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3174 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3176 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3177 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3181 for(j=0; j<h->ref_count[1]; j++){
3182 for(i=0; i<h->ref_count[0]; i++)
3183 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3184 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3185 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3189 static int pred_weight_table(H264Context *h){
3190 MpegEncContext * const s = &h->s;
3192 int luma_def, chroma_def;
3195 h->use_weight_chroma= 0;
3196 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3197 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3198 luma_def = 1<<h->luma_log2_weight_denom;
3199 chroma_def = 1<<h->chroma_log2_weight_denom;
3201 for(list=0; list<2; list++){
3202 for(i=0; i<h->ref_count[list]; i++){
3203 int luma_weight_flag, chroma_weight_flag;
3205 luma_weight_flag= get_bits1(&s->gb);
3206 if(luma_weight_flag){
3207 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3208 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3209 if( h->luma_weight[list][i] != luma_def
3210 || h->luma_offset[list][i] != 0)
3213 h->luma_weight[list][i]= luma_def;
3214 h->luma_offset[list][i]= 0;
3217 chroma_weight_flag= get_bits1(&s->gb);
3218 if(chroma_weight_flag){
3221 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3222 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3223 if( h->chroma_weight[list][i][j] != chroma_def
3224 || h->chroma_offset[list][i][j] != 0)
3225 h->use_weight_chroma= 1;
3230 h->chroma_weight[list][i][j]= chroma_def;
3231 h->chroma_offset[list][i][j]= 0;
3235 if(h->slice_type != B_TYPE) break;
3237 h->use_weight= h->use_weight || h->use_weight_chroma;
3241 static void implicit_weight_table(H264Context *h){
3242 MpegEncContext * const s = &h->s;
3244 int cur_poc = s->current_picture_ptr->poc;
3246 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3247 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3249 h->use_weight_chroma= 0;
3254 h->use_weight_chroma= 2;
3255 h->luma_log2_weight_denom= 5;
3256 h->chroma_log2_weight_denom= 5;
3258 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3259 int poc0 = h->ref_list[0][ref0].poc;
3260 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3261 int poc1 = h->ref_list[1][ref1].poc;
3262 int td = av_clip(poc1 - poc0, -128, 127);
3264 int tb = av_clip(cur_poc - poc0, -128, 127);
3265 int tx = (16384 + (FFABS(td) >> 1)) / td;
3266 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3267 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3268 h->implicit_weight[ref0][ref1] = 32;
3270 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3272 h->implicit_weight[ref0][ref1] = 32;
3278 * Mark a picture as no longer needed for reference. The refmask
3279 * argument allows unreferencing of individual fields or the whole frame.
3280 * If the picture becomes entirely unreferenced, but is being held for
3281 * display purposes, it is marked as such.
3282 * @param refmask mask of fields to unreference; the mask is bitwise
3283 * anded with the reference marking of pic
3284 * @return non-zero if pic becomes entirely unreferenced (except possibly
3285 * for display purposes) zero if one of the fields remains in
3288 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3290 if (pic->reference &= refmask) {
3293 if(pic == h->delayed_output_pic)
3294 pic->reference=DELAYED_PIC_REF;
3296 for(i = 0; h->delayed_pic[i]; i++)
3297 if(pic == h->delayed_pic[i]){
3298 pic->reference=DELAYED_PIC_REF;
3307 * instantaneous decoder refresh.
3309 static void idr(H264Context *h){
3312 for(i=0; i<16; i++){
3313 if (h->long_ref[i] != NULL) {
3314 unreference_pic(h, h->long_ref[i], 0);
3315 h->long_ref[i]= NULL;
3318 h->long_ref_count=0;
3320 for(i=0; i<h->short_ref_count; i++){
3321 unreference_pic(h, h->short_ref[i], 0);
3322 h->short_ref[i]= NULL;
3324 h->short_ref_count=0;
3327 /* forget old pics after a seek */
3328 static void flush_dpb(AVCodecContext *avctx){
3329 H264Context *h= avctx->priv_data;
3331 for(i=0; i<16; i++) {
3332 if(h->delayed_pic[i])
3333 h->delayed_pic[i]->reference= 0;
3334 h->delayed_pic[i]= NULL;
3336 if(h->delayed_output_pic)
3337 h->delayed_output_pic->reference= 0;
3338 h->delayed_output_pic= NULL;
3340 if(h->s.current_picture_ptr)
3341 h->s.current_picture_ptr->reference= 0;
3342 h->s.first_field= 0;
3346 * Find a Picture in the short term reference list by frame number.
3347 * @param frame_num frame number to search for
3348 * @param idx the index into h->short_ref where returned picture is found
3349 * undefined if no picture found.
3350 * @return pointer to the found picture, or NULL if no pic with the provided
3351 * frame number is found
3353 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3354 MpegEncContext * const s = &h->s;
3357 for(i=0; i<h->short_ref_count; i++){
3358 Picture *pic= h->short_ref[i];
3359 if(s->avctx->debug&FF_DEBUG_MMCO)
3360 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3361 if(pic->frame_num == frame_num) {
3370 * Remove a picture from the short term reference list by its index in
3371 * that list. This does no checking on the provided index; it is assumed
3372 * to be valid. Other list entries are shifted down.
3373 * @param i index into h->short_ref of picture to remove.
3375 static void remove_short_at_index(H264Context *h, int i){
3376 assert(i > 0 && i < h->short_ref_count);
3377 h->short_ref[i]= NULL;
3378 if (--h->short_ref_count)
3379 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3384 * @return the removed picture or NULL if an error occurs
3386 static Picture * remove_short(H264Context *h, int frame_num){
3387 MpegEncContext * const s = &h->s;
3391 if(s->avctx->debug&FF_DEBUG_MMCO)
3392 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3394 pic = find_short(h, frame_num, &i);
3396 remove_short_at_index(h, i);
3402 * Remove a picture from the long term reference list by its index in
3403 * that list. This does no checking on the provided index; it is assumed
3404 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3405 * @param i index into h->long_ref of picture to remove.
3407 static void remove_long_at_index(H264Context *h, int i){
3408 h->long_ref[i]= NULL;
3409 h->long_ref_count--;
3414 * @return the removed picture or NULL if an error occurs
3416 static Picture * remove_long(H264Context *h, int i){
3419 pic= h->long_ref[i];
3421 remove_long_at_index(h, i);
3427 * print short term list
3429 static void print_short_term(H264Context *h) {
3431 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3432 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3433 for(i=0; i<h->short_ref_count; i++){
3434 Picture *pic= h->short_ref[i];
3435 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3441 * print long term list
3443 static void print_long_term(H264Context *h) {
3445 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3446 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3447 for(i = 0; i < 16; i++){
3448 Picture *pic= h->long_ref[i];
3450 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3457 * Executes the reference picture marking (memory management control operations).
3459 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3460 MpegEncContext * const s = &h->s;
3462 int current_ref_assigned=0;
3465 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3466 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3468 for(i=0; i<mmco_count; i++){
3469 int structure, frame_num, unref_pic;
3470 if(s->avctx->debug&FF_DEBUG_MMCO)
3471 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3473 switch(mmco[i].opcode){
3474 case MMCO_SHORT2UNUSED:
3475 if(s->avctx->debug&FF_DEBUG_MMCO)
3476 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3477 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3478 pic = find_short(h, frame_num, &j);
3480 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3481 remove_short_at_index(h, j);
3482 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3483 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3485 case MMCO_SHORT2LONG:
3486 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3487 h->long_ref[mmco[i].long_arg]->frame_num ==
3488 mmco[i].short_pic_num / 2) {
3489 /* do nothing, we've already moved this field pair. */
3491 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3493 pic= remove_long(h, mmco[i].long_arg);
3494 if(pic) unreference_pic(h, pic, 0);
3496 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3497 if (h->long_ref[ mmco[i].long_arg ]){
3498 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3499 h->long_ref_count++;
3503 case MMCO_LONG2UNUSED:
3504 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3505 pic = h->long_ref[j];
3507 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3508 remove_long_at_index(h, j);
3509 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3510 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3514 if (FIELD_PICTURE && !s->first_field) {
3515 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3516 /* Just mark second field as referenced */
3518 } else if (s->current_picture_ptr->reference) {
3519 /* First field in pair is in short term list or
3520 * at a different long term index.
3521 * This is not allowed; see 7.4.3, notes 2 and 3.
3522 * Report the problem and keep the pair where it is,
3523 * and mark this field valid.
3525 av_log(h->s.avctx, AV_LOG_ERROR,
3526 "illegal long term reference assignment for second "
3527 "field in complementary field pair (first field is "
3528 "short term or has non-matching long index)\n");
3534 pic= remove_long(h, mmco[i].long_arg);
3535 if(pic) unreference_pic(h, pic, 0);
3537 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3538 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3539 h->long_ref_count++;
3542 s->current_picture_ptr->reference |= s->picture_structure;
3543 current_ref_assigned=1;
3545 case MMCO_SET_MAX_LONG:
3546 assert(mmco[i].long_arg <= 16);
3547 // just remove the long term which index is greater than new max
3548 for(j = mmco[i].long_arg; j<16; j++){
3549 pic = remove_long(h, j);
3550 if (pic) unreference_pic(h, pic, 0);
3554 while(h->short_ref_count){
3555 pic= remove_short(h, h->short_ref[0]->frame_num);
3556 if(pic) unreference_pic(h, pic, 0);
3558 for(j = 0; j < 16; j++) {
3559 pic= remove_long(h, j);
3560 if(pic) unreference_pic(h, pic, 0);
3567 if (!current_ref_assigned && FIELD_PICTURE &&
3568 !s->first_field && s->current_picture_ptr->reference) {
3570 /* Second field of complementary field pair; the first field of
3571 * which is already referenced. If short referenced, it
3572 * should be first entry in short_ref. If not, it must exist
3573 * in long_ref; trying to put it on the short list here is an
3574 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3576 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3577 /* Just mark the second field valid */
3578 s->current_picture_ptr->reference = PICT_FRAME;
3579 } else if (s->current_picture_ptr->long_ref) {
3580 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3581 "assignment for second field "
3582 "in complementary field pair "
3583 "(first field is long term)\n");
3586 * First field in reference, but not in any sensible place on our
3587 * reference lists. This shouldn't happen unless reference
3588 * handling somewhere else is wrong.
3592 current_ref_assigned = 1;
3595 if(!current_ref_assigned){
3596 pic= remove_short(h, s->current_picture_ptr->frame_num);
3598 unreference_pic(h, pic, 0);
3599 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3602 if(h->short_ref_count)
3603 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3605 h->short_ref[0]= s->current_picture_ptr;
3606 h->short_ref[0]->long_ref=0;
3607 h->short_ref_count++;
3608 s->current_picture_ptr->reference |= s->picture_structure;
3611 print_short_term(h);
3616 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3617 MpegEncContext * const s = &h->s;
3620 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3621 s->broken_link= get_bits1(gb) -1;
3622 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3623 if(h->mmco[0].long_arg == -1)
3626 h->mmco[0].opcode= MMCO_LONG;
3630 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3631 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3632 MMCOOpcode opcode= get_ue_golomb(gb);
3634 h->mmco[i].opcode= opcode;
3635 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3636 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3637 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3638 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3642 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3643 unsigned int long_arg= get_ue_golomb(gb);
3644 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3645 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3648 h->mmco[i].long_arg= long_arg;
3651 if(opcode > (unsigned)MMCO_LONG){
3652 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3655 if(opcode == MMCO_END)
3660 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3662 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3663 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3664 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3665 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3667 if (FIELD_PICTURE) {
3668 h->mmco[0].short_pic_num *= 2;
3669 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3670 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3681 static int init_poc(H264Context *h){
3682 MpegEncContext * const s = &h->s;
3683 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3686 if(h->nal_unit_type == NAL_IDR_SLICE){
3687 h->frame_num_offset= 0;
3689 if(h->frame_num < h->prev_frame_num)
3690 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3692 h->frame_num_offset= h->prev_frame_num_offset;
3695 if(h->sps.poc_type==0){
3696 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3698 if(h->nal_unit_type == NAL_IDR_SLICE){
3703 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3704 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3705 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3706 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3708 h->poc_msb = h->prev_poc_msb;
3709 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3711 field_poc[1] = h->poc_msb + h->poc_lsb;
3712 if(s->picture_structure == PICT_FRAME)
3713 field_poc[1] += h->delta_poc_bottom;
3714 }else if(h->sps.poc_type==1){
3715 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3718 if(h->sps.poc_cycle_length != 0)
3719 abs_frame_num = h->frame_num_offset + h->frame_num;
3723 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3726 expected_delta_per_poc_cycle = 0;
3727 for(i=0; i < h->sps.poc_cycle_length; i++)
3728 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3730 if(abs_frame_num > 0){
3731 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3732 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3734 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3735 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3736 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3740 if(h->nal_ref_idc == 0)
3741 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3743 field_poc[0] = expectedpoc + h->delta_poc[0];
3744 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3746 if(s->picture_structure == PICT_FRAME)
3747 field_poc[1] += h->delta_poc[1];
3750 if(h->nal_unit_type == NAL_IDR_SLICE){
3753 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3754 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3760 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3761 s->current_picture_ptr->field_poc[0]= field_poc[0];
3762 s->current_picture_ptr->poc = field_poc[0];
3764 if(s->picture_structure != PICT_TOP_FIELD) {
3765 s->current_picture_ptr->field_poc[1]= field_poc[1];
3766 s->current_picture_ptr->poc = field_poc[1];
3768 if(!FIELD_PICTURE || !s->first_field)
3769 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
3776 * initialize scan tables
3778 static void init_scan_tables(H264Context *h){
3779 MpegEncContext * const s = &h->s;
3781 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3782 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3783 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3785 for(i=0; i<16; i++){
3786 #define T(x) (x>>2) | ((x<<2) & 0xF)
3787 h->zigzag_scan[i] = T(zigzag_scan[i]);
3788 h-> field_scan[i] = T( field_scan[i]);
3792 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3793 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3794 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3795 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3796 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3798 for(i=0; i<64; i++){
3799 #define T(x) (x>>3) | ((x&7)<<3)
3800 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3801 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3802 h->field_scan8x8[i] = T(field_scan8x8[i]);
3803 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3807 if(h->sps.transform_bypass){ //FIXME same ugly
3808 h->zigzag_scan_q0 = zigzag_scan;
3809 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3810 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3811 h->field_scan_q0 = field_scan;
3812 h->field_scan8x8_q0 = field_scan8x8;
3813 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3815 h->zigzag_scan_q0 = h->zigzag_scan;
3816 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3817 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3818 h->field_scan_q0 = h->field_scan;
3819 h->field_scan8x8_q0 = h->field_scan8x8;
3820 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3825 * Replicates H264 "master" context to thread contexts.
3827 static void clone_slice(H264Context *dst, H264Context *src)
3829 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3830 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3831 dst->s.current_picture = src->s.current_picture;
3832 dst->s.linesize = src->s.linesize;
3833 dst->s.uvlinesize = src->s.uvlinesize;
3834 dst->s.first_field = src->s.first_field;
3836 dst->prev_poc_msb = src->prev_poc_msb;
3837 dst->prev_poc_lsb = src->prev_poc_lsb;
3838 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3839 dst->prev_frame_num = src->prev_frame_num;
3840 dst->short_ref_count = src->short_ref_count;
3842 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3843 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3844 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3845 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3847 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3848 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3852 * decodes a slice header.
3853 * this will allso call MPV_common_init() and frame_start() as needed
3855 * @param h h264context
3856 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3858 * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
3860 static int decode_slice_header(H264Context *h, H264Context *h0){
3861 MpegEncContext * const s = &h->s;
3862 MpegEncContext * const s0 = &h0->s;
3863 unsigned int first_mb_in_slice;
3864 unsigned int pps_id;
3865 int num_ref_idx_active_override_flag;
3866 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
3867 unsigned int slice_type, tmp, i;
3868 int default_ref_list_done = 0;
3869 int last_pic_structure;
3871 s->dropable= h->nal_ref_idc == 0;
3873 first_mb_in_slice= get_ue_golomb(&s->gb);
3875 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3876 h0->current_slice = 0;
3877 if (!s0->first_field)
3878 s->current_picture_ptr= NULL;
3881 slice_type= get_ue_golomb(&s->gb);
3883 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3888 h->slice_type_fixed=1;
3890 h->slice_type_fixed=0;
3892 slice_type= slice_type_map[ slice_type ];
3893 if (slice_type == I_TYPE
3894 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3895 default_ref_list_done = 1;
3897 h->slice_type= slice_type;
3899 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3901 pps_id= get_ue_golomb(&s->gb);
3902 if(pps_id>=MAX_PPS_COUNT){
3903 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3906 if(!h0->pps_buffers[pps_id]) {
3907 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3910 h->pps= *h0->pps_buffers[pps_id];
3912 if(!h0->sps_buffers[h->pps.sps_id]) {
3913 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3916 h->sps = *h0->sps_buffers[h->pps.sps_id];
3918 if(h == h0 && h->dequant_coeff_pps != pps_id){
3919 h->dequant_coeff_pps = pps_id;
3920 init_dequant_tables(h);
3923 s->mb_width= h->sps.mb_width;
3924 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3926 h->b_stride= s->mb_width*4;
3927 h->b8_stride= s->mb_width*2;
3929 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3930 if(h->sps.frame_mbs_only_flag)
3931 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3933 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3935 if (s->context_initialized
3936 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3938 return -1; // width / height changed during parallelized decoding
3942 if (!s->context_initialized) {
3944 return -1; // we cant (re-)initialize context during parallel decoding
3945 if (MPV_common_init(s) < 0)
3949 init_scan_tables(h);
3952 for(i = 1; i < s->avctx->thread_count; i++) {
3954 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3955 memcpy(c, h, sizeof(MpegEncContext));
3956 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3959 init_scan_tables(c);
3963 for(i = 0; i < s->avctx->thread_count; i++)
3964 if(context_init(h->thread_context[i]) < 0)
3967 s->avctx->width = s->width;
3968 s->avctx->height = s->height;
3969 s->avctx->sample_aspect_ratio= h->sps.sar;
3970 if(!s->avctx->sample_aspect_ratio.den)
3971 s->avctx->sample_aspect_ratio.den = 1;
3973 if(h->sps.timing_info_present_flag){
3974 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3975 if(h->x264_build > 0 && h->x264_build < 44)
3976 s->avctx->time_base.den *= 2;
3977 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3978 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3982 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3985 h->mb_aff_frame = 0;
3986 last_pic_structure = s0->picture_structure;
3987 if(h->sps.frame_mbs_only_flag){
3988 s->picture_structure= PICT_FRAME;
3990 if(get_bits1(&s->gb)) { //field_pic_flag
3991 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3993 s->picture_structure= PICT_FRAME;
3994 h->mb_aff_frame = h->sps.mb_aff;
3998 if(h0->current_slice == 0){
3999 /* See if we have a decoded first field looking for a pair... */
4000 if (s0->first_field) {
4001 assert(s0->current_picture_ptr);
4002 assert(s0->current_picture_ptr->data[0]);
4003 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4005 /* figure out if we have a complementary field pair */
4006 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4008 * Previous field is unmatched. Don't display it, but let it
4009 * remain for reference if marked as such.
4011 s0->current_picture_ptr = NULL;
4012 s0->first_field = FIELD_PICTURE;
4015 if (h->nal_ref_idc &&
4016 s0->current_picture_ptr->reference &&
4017 s0->current_picture_ptr->frame_num != h->frame_num) {
4019 * This and previous field were reference, but had
4020 * different frame_nums. Consider this field first in
4021 * pair. Throw away previous field except for reference
4024 s0->first_field = 1;
4025 s0->current_picture_ptr = NULL;
4028 /* Second field in complementary pair */
4029 s0->first_field = 0;
4034 /* Frame or first field in a potentially complementary pair */
4035 assert(!s0->current_picture_ptr);
4036 s0->first_field = FIELD_PICTURE;
4039 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4040 s0->first_field = 0;
4047 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4049 assert(s->mb_num == s->mb_width * s->mb_height);
4050 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4051 first_mb_in_slice >= s->mb_num){
4052 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4055 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4056 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4057 if (s->picture_structure == PICT_BOTTOM_FIELD)
4058 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4059 assert(s->mb_y < s->mb_height);
4061 if(s->picture_structure==PICT_FRAME){
4062 h->curr_pic_num= h->frame_num;
4063 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4065 h->curr_pic_num= 2*h->frame_num + 1;
4066 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4069 if(h->nal_unit_type == NAL_IDR_SLICE){
4070 get_ue_golomb(&s->gb); /* idr_pic_id */
4073 if(h->sps.poc_type==0){
4074 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4076 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4077 h->delta_poc_bottom= get_se_golomb(&s->gb);
4081 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4082 h->delta_poc[0]= get_se_golomb(&s->gb);
4084 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4085 h->delta_poc[1]= get_se_golomb(&s->gb);
4090 if(h->pps.redundant_pic_cnt_present){
4091 h->redundant_pic_count= get_ue_golomb(&s->gb);
4094 //set defaults, might be overriden a few line later
4095 h->ref_count[0]= h->pps.ref_count[0];
4096 h->ref_count[1]= h->pps.ref_count[1];
4098 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4099 if(h->slice_type == B_TYPE){
4100 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4101 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4102 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4104 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4106 if(num_ref_idx_active_override_flag){
4107 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4108 if(h->slice_type==B_TYPE)
4109 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4111 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4112 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4113 h->ref_count[0]= h->ref_count[1]= 1;
4117 if(h->slice_type == B_TYPE)
4124 if(!default_ref_list_done){
4125 fill_default_ref_list(h);
4128 if(decode_ref_pic_list_reordering(h) < 0)
4131 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4132 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4133 pred_weight_table(h);
4134 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4135 implicit_weight_table(h);
4140 decode_ref_pic_marking(h0, &s->gb);
4143 fill_mbaff_ref_list(h);
4145 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
4146 tmp = get_ue_golomb(&s->gb);
4148 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4151 h->cabac_init_idc= tmp;
4154 h->last_qscale_diff = 0;
4155 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4157 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4161 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4162 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4163 //FIXME qscale / qp ... stuff
4164 if(h->slice_type == SP_TYPE){
4165 get_bits1(&s->gb); /* sp_for_switch_flag */
4167 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4168 get_se_golomb(&s->gb); /* slice_qs_delta */
4171 h->deblocking_filter = 1;
4172 h->slice_alpha_c0_offset = 0;
4173 h->slice_beta_offset = 0;
4174 if( h->pps.deblocking_filter_parameters_present ) {
4175 tmp= get_ue_golomb(&s->gb);
4177 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4180 h->deblocking_filter= tmp;
4181 if(h->deblocking_filter < 2)
4182 h->deblocking_filter^= 1; // 1<->0
4184 if( h->deblocking_filter ) {
4185 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4186 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4190 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4191 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4192 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4193 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4194 h->deblocking_filter= 0;
4196 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4197 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4198 /* Cheat slightly for speed:
4199 Dont bother to deblock across slices */
4200 h->deblocking_filter = 2;
4202 h0->max_contexts = 1;
4203 if(!h0->single_decode_warning) {
4204 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4205 h0->single_decode_warning = 1;
4208 return 1; // deblocking switched inside frame
4213 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4214 slice_group_change_cycle= get_bits(&s->gb, ?);
4217 h0->last_slice_type = slice_type;
4218 h->slice_num = ++h0->current_slice;
4220 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4221 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4223 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4224 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4226 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4228 av_get_pict_type_char(h->slice_type),
4229 pps_id, h->frame_num,
4230 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4231 h->ref_count[0], h->ref_count[1],
4233 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4235 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4239 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
4240 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
4241 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
4243 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
4244 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
4253 static inline int get_level_prefix(GetBitContext *gb){
4257 OPEN_READER(re, gb);
4258 UPDATE_CACHE(re, gb);
4259 buf=GET_CACHE(re, gb);
4261 log= 32 - av_log2(buf);
4263 print_bin(buf>>(32-log), log);
4264 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4267 LAST_SKIP_BITS(re, gb, log);
4268 CLOSE_READER(re, gb);
4273 static inline int get_dct8x8_allowed(H264Context *h){
4276 if(!IS_SUB_8X8(h->sub_mb_type[i])
4277 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4284 * decodes a residual block.
4285 * @param n block index
4286 * @param scantable scantable
4287 * @param max_coeff number of coefficients in the block
4288 * @return <0 if an error occured
4290 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4291 MpegEncContext * const s = &h->s;
4292 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4294 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4296 //FIXME put trailing_onex into the context
4298 if(n == CHROMA_DC_BLOCK_INDEX){
4299 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4300 total_coeff= coeff_token>>2;
4302 if(n == LUMA_DC_BLOCK_INDEX){
4303 total_coeff= pred_non_zero_count(h, 0);
4304 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4305 total_coeff= coeff_token>>2;
4307 total_coeff= pred_non_zero_count(h, n);
4308 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4309 total_coeff= coeff_token>>2;
4310 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4314 //FIXME set last_non_zero?
4318 if(total_coeff > (unsigned)max_coeff) {
4319 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4323 trailing_ones= coeff_token&3;
4324 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4325 assert(total_coeff<=16);
4327 for(i=0; i<trailing_ones; i++){
4328 level[i]= 1 - 2*get_bits1(gb);
4332 int level_code, mask;
4333 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4334 int prefix= get_level_prefix(gb);
4336 //first coefficient has suffix_length equal to 0 or 1
4337 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4339 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4341 level_code= (prefix<<suffix_length); //part
4342 }else if(prefix==14){
4344 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4346 level_code= prefix + get_bits(gb, 4); //part
4347 }else if(prefix==15){
4348 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4349 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4351 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4355 if(trailing_ones < 3) level_code += 2;
4360 mask= -(level_code&1);
4361 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4364 //remaining coefficients have suffix_length > 0
4365 for(;i<total_coeff;i++) {
4366 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4367 prefix = get_level_prefix(gb);
4369 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4370 }else if(prefix==15){
4371 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4373 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4376 mask= -(level_code&1);
4377 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4378 if(level_code > suffix_limit[suffix_length])
4383 if(total_coeff == max_coeff)
4386 if(n == CHROMA_DC_BLOCK_INDEX)
4387 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4389 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4392 coeff_num = zeros_left + total_coeff - 1;
4393 j = scantable[coeff_num];
4395 block[j] = level[0];
4396 for(i=1;i<total_coeff;i++) {
4399 else if(zeros_left < 7){
4400 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4402 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4404 zeros_left -= run_before;
4405 coeff_num -= 1 + run_before;
4406 j= scantable[ coeff_num ];
4411 block[j] = (level[0] * qmul[j] + 32)>>6;
4412 for(i=1;i<total_coeff;i++) {
4415 else if(zeros_left < 7){
4416 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4418 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4420 zeros_left -= run_before;
4421 coeff_num -= 1 + run_before;
4422 j= scantable[ coeff_num ];
4424 block[j]= (level[i] * qmul[j] + 32)>>6;
4429 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4436 static void predict_field_decoding_flag(H264Context *h){
4437 MpegEncContext * const s = &h->s;
4438 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4439 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4440 ? s->current_picture.mb_type[mb_xy-1]
4441 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4442 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4444 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4448 * decodes a P_SKIP or B_SKIP macroblock
4450 static void decode_mb_skip(H264Context *h){
4451 MpegEncContext * const s = &h->s;
4452 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4455 memset(h->non_zero_count[mb_xy], 0, 16);
4456 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4459 mb_type|= MB_TYPE_INTERLACED;
4461 if( h->slice_type == B_TYPE )
4463 // just for fill_caches. pred_direct_motion will set the real mb_type
4464 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4466 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4467 pred_direct_motion(h, &mb_type);
4468 mb_type|= MB_TYPE_SKIP;
4473 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4475 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4476 pred_pskip_motion(h, &mx, &my);
4477 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4478 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4481 write_back_motion(h, mb_type);
4482 s->current_picture.mb_type[mb_xy]= mb_type;
4483 s->current_picture.qscale_table[mb_xy]= s->qscale;
4484 h->slice_table[ mb_xy ]= h->slice_num;
4485 h->prev_mb_skipped= 1;
4489 * decodes a macroblock
4490 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4492 static int decode_mb_cavlc(H264Context *h){
4493 MpegEncContext * const s = &h->s;
4494 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4495 int partition_count;
4496 unsigned int mb_type, cbp;
4497 int dct8x8_allowed= h->pps.transform_8x8_mode;
4499 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4501 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4502 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4504 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
4505 if(s->mb_skip_run==-1)
4506 s->mb_skip_run= get_ue_golomb(&s->gb);
4508 if (s->mb_skip_run--) {
4509 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4510 if(s->mb_skip_run==0)
4511 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4513 predict_field_decoding_flag(h);
4520 if( (s->mb_y&1) == 0 )
4521 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4523 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4525 h->prev_mb_skipped= 0;
4527 mb_type= get_ue_golomb(&s->gb);
4528 if(h->slice_type == B_TYPE){
4530 partition_count= b_mb_type_info[mb_type].partition_count;
4531 mb_type= b_mb_type_info[mb_type].type;
4534 goto decode_intra_mb;
4536 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
4538 partition_count= p_mb_type_info[mb_type].partition_count;
4539 mb_type= p_mb_type_info[mb_type].type;
4542 goto decode_intra_mb;
4545 assert(h->slice_type == I_TYPE);
4548 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4552 cbp= i_mb_type_info[mb_type].cbp;
4553 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4554 mb_type= i_mb_type_info[mb_type].type;
4558 mb_type |= MB_TYPE_INTERLACED;
4560 h->slice_table[ mb_xy ]= h->slice_num;
4562 if(IS_INTRA_PCM(mb_type)){
4565 // We assume these blocks are very rare so we do not optimize it.
4566 align_get_bits(&s->gb);
4568 // The pixels are stored in the same order as levels in h->mb array.
4569 for(y=0; y<16; y++){
4570 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4571 for(x=0; x<16; x++){
4572 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4573 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4577 const int index= 256 + 4*(y&3) + 32*(y>>2);
4579 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4580 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4584 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4586 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4587 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4591 // In deblocking, the quantizer is 0
4592 s->current_picture.qscale_table[mb_xy]= 0;
4593 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4594 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4595 // All coeffs are present
4596 memset(h->non_zero_count[mb_xy], 16, 16);
4598 s->current_picture.mb_type[mb_xy]= mb_type;
4603 h->ref_count[0] <<= 1;
4604 h->ref_count[1] <<= 1;
4607 fill_caches(h, mb_type, 0);
4610 if(IS_INTRA(mb_type)){
4612 // init_top_left_availability(h);
4613 if(IS_INTRA4x4(mb_type)){
4616 if(dct8x8_allowed && get_bits1(&s->gb)){
4617 mb_type |= MB_TYPE_8x8DCT;
4621 // fill_intra4x4_pred_table(h);
4622 for(i=0; i<16; i+=di){
4623 int mode= pred_intra_mode(h, i);
4625 if(!get_bits1(&s->gb)){
4626 const int rem_mode= get_bits(&s->gb, 3);
4627 mode = rem_mode + (rem_mode >= mode);
4631 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4633 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4635 write_back_intra_pred_mode(h);
4636 if( check_intra4x4_pred_mode(h) < 0)
4639 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4640 if(h->intra16x16_pred_mode < 0)
4644 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4647 h->chroma_pred_mode= pred_mode;
4648 }else if(partition_count==4){
4649 int i, j, sub_partition_count[4], list, ref[2][4];
4651 if(h->slice_type == B_TYPE){
4653 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4654 if(h->sub_mb_type[i] >=13){
4655 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4658 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4659 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4661 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4662 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4663 pred_direct_motion(h, &mb_type);
4664 h->ref_cache[0][scan8[4]] =
4665 h->ref_cache[1][scan8[4]] =
4666 h->ref_cache[0][scan8[12]] =
4667 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4670 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
4672 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4673 if(h->sub_mb_type[i] >=4){
4674 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4677 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4678 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4682 for(list=0; list<h->list_count; list++){
4683 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4685 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4686 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4687 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4689 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4701 dct8x8_allowed = get_dct8x8_allowed(h);
4703 for(list=0; list<h->list_count; list++){
4705 if(IS_DIRECT(h->sub_mb_type[i])) {
4706 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4709 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4710 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4712 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4713 const int sub_mb_type= h->sub_mb_type[i];
4714 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4715 for(j=0; j<sub_partition_count[i]; j++){
4717 const int index= 4*i + block_width*j;
4718 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4719 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4720 mx += get_se_golomb(&s->gb);
4721 my += get_se_golomb(&s->gb);
4722 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4724 if(IS_SUB_8X8(sub_mb_type)){
4726 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4728 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4729 }else if(IS_SUB_8X4(sub_mb_type)){
4730 mv_cache[ 1 ][0]= mx;
4731 mv_cache[ 1 ][1]= my;
4732 }else if(IS_SUB_4X8(sub_mb_type)){
4733 mv_cache[ 8 ][0]= mx;
4734 mv_cache[ 8 ][1]= my;
4736 mv_cache[ 0 ][0]= mx;
4737 mv_cache[ 0 ][1]= my;
4740 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4746 }else if(IS_DIRECT(mb_type)){
4747 pred_direct_motion(h, &mb_type);
4748 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4750 int list, mx, my, i;
4751 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4752 if(IS_16X16(mb_type)){
4753 for(list=0; list<h->list_count; list++){
4755 if(IS_DIR(mb_type, 0, list)){
4756 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4757 if(val >= h->ref_count[list]){
4758 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4762 val= LIST_NOT_USED&0xFF;
4763 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4765 for(list=0; list<h->list_count; list++){
4767 if(IS_DIR(mb_type, 0, list)){
4768 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4769 mx += get_se_golomb(&s->gb);
4770 my += get_se_golomb(&s->gb);
4771 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4773 val= pack16to32(mx,my);
4776 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4779 else if(IS_16X8(mb_type)){
4780 for(list=0; list<h->list_count; list++){
4783 if(IS_DIR(mb_type, i, list)){
4784 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4785 if(val >= h->ref_count[list]){
4786 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4790 val= LIST_NOT_USED&0xFF;
4791 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4794 for(list=0; list<h->list_count; list++){
4797 if(IS_DIR(mb_type, i, list)){
4798 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4799 mx += get_se_golomb(&s->gb);
4800 my += get_se_golomb(&s->gb);
4801 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4803 val= pack16to32(mx,my);
4806 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4810 assert(IS_8X16(mb_type));
4811 for(list=0; list<h->list_count; list++){
4814 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4815 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4816 if(val >= h->ref_count[list]){
4817 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4821 val= LIST_NOT_USED&0xFF;
4822 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4825 for(list=0; list<h->list_count; list++){
4828 if(IS_DIR(mb_type, i, list)){
4829 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4830 mx += get_se_golomb(&s->gb);
4831 my += get_se_golomb(&s->gb);
4832 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4834 val= pack16to32(mx,my);
4837 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4843 if(IS_INTER(mb_type))
4844 write_back_motion(h, mb_type);
4846 if(!IS_INTRA16x16(mb_type)){
4847 cbp= get_ue_golomb(&s->gb);
4849 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4853 if(IS_INTRA4x4(mb_type))
4854 cbp= golomb_to_intra4x4_cbp[cbp];
4856 cbp= golomb_to_inter_cbp[cbp];
4860 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4861 if(get_bits1(&s->gb))
4862 mb_type |= MB_TYPE_8x8DCT;
4864 s->current_picture.mb_type[mb_xy]= mb_type;
4866 if(cbp || IS_INTRA16x16(mb_type)){
4867 int i8x8, i4x4, chroma_idx;
4869 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4870 const uint8_t *scan, *scan8x8, *dc_scan;
4872 // fill_non_zero_count_cache(h);
4874 if(IS_INTERLACED(mb_type)){
4875 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4876 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4877 dc_scan= luma_dc_field_scan;
4879 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4880 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4881 dc_scan= luma_dc_zigzag_scan;
4884 dquant= get_se_golomb(&s->gb);
4886 if( dquant > 25 || dquant < -26 ){
4887 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4891 s->qscale += dquant;
4892 if(((unsigned)s->qscale) > 51){
4893 if(s->qscale<0) s->qscale+= 52;
4894 else s->qscale-= 52;
4897 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4898 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4899 if(IS_INTRA16x16(mb_type)){
4900 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4901 return -1; //FIXME continue if partitioned and other return -1 too
4904 assert((cbp&15) == 0 || (cbp&15) == 15);
4907 for(i8x8=0; i8x8<4; i8x8++){
4908 for(i4x4=0; i4x4<4; i4x4++){
4909 const int index= i4x4 + 4*i8x8;
4910 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4916 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4919 for(i8x8=0; i8x8<4; i8x8++){
4920 if(cbp & (1<<i8x8)){
4921 if(IS_8x8DCT(mb_type)){
4922 DCTELEM *buf = &h->mb[64*i8x8];
4924 for(i4x4=0; i4x4<4; i4x4++){
4925 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4926 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4929 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4930 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4932 for(i4x4=0; i4x4<4; i4x4++){
4933 const int index= i4x4 + 4*i8x8;
4935 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4941 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4942 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4948 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4949 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4955 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4956 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4957 for(i4x4=0; i4x4<4; i4x4++){
4958 const int index= 16 + 4*chroma_idx + i4x4;
4959 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4965 uint8_t * const nnz= &h->non_zero_count_cache[0];
4966 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4967 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4970 uint8_t * const nnz= &h->non_zero_count_cache[0];
4971 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4972 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4973 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4975 s->current_picture.qscale_table[mb_xy]= s->qscale;
4976 write_back_non_zero_count(h);
4979 h->ref_count[0] >>= 1;
4980 h->ref_count[1] >>= 1;
4986 static int decode_cabac_field_decoding_flag(H264Context *h) {
4987 MpegEncContext * const s = &h->s;
4988 const int mb_x = s->mb_x;
4989 const int mb_y = s->mb_y & ~1;
4990 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4991 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4993 unsigned int ctx = 0;
4995 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4998 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5002 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5005 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5006 uint8_t *state= &h->cabac_state[ctx_base];
5010 MpegEncContext * const s = &h->s;
5011 const int mba_xy = h->left_mb_xy[0];
5012 const int mbb_xy = h->top_mb_xy;
5014 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5016 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5018 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5019 return 0; /* I4x4 */
5022 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5023 return 0; /* I4x4 */
5026 if( get_cabac_terminate( &h->cabac ) )
5027 return 25; /* PCM */
5029 mb_type = 1; /* I16x16 */
5030 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5031 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5032 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5033 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5034 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5038 static int decode_cabac_mb_type( H264Context *h ) {
5039 MpegEncContext * const s = &h->s;
5041 if( h->slice_type == I_TYPE ) {
5042 return decode_cabac_intra_mb_type(h, 3, 1);
5043 } else if( h->slice_type == P_TYPE ) {
5044 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5046 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5047 /* P_L0_D16x16, P_8x8 */
5048 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5050 /* P_L0_D8x16, P_L0_D16x8 */
5051 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5054 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5056 } else if( h->slice_type == B_TYPE ) {
5057 const int mba_xy = h->left_mb_xy[0];
5058 const int mbb_xy = h->top_mb_xy;
5062 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5064 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5067 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5068 return 0; /* B_Direct_16x16 */
5070 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5071 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5074 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5075 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5076 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5077 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5079 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5080 else if( bits == 13 ) {
5081 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5082 } else if( bits == 14 )
5083 return 11; /* B_L1_L0_8x16 */
5084 else if( bits == 15 )
5085 return 22; /* B_8x8 */
5087 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5088 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5090 /* TODO SI/SP frames? */
5095 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5096 MpegEncContext * const s = &h->s;
5100 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5101 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5104 && h->slice_table[mba_xy] == h->slice_num
5105 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5106 mba_xy += s->mb_stride;
5108 mbb_xy = mb_xy - s->mb_stride;
5110 && h->slice_table[mbb_xy] == h->slice_num
5111 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5112 mbb_xy -= s->mb_stride;
5114 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5116 int mb_xy = mb_x + mb_y*s->mb_stride;
5118 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5121 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5123 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5126 if( h->slice_type == B_TYPE )
5128 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5131 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5134 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5137 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5138 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5139 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5141 if( mode >= pred_mode )
5147 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5148 const int mba_xy = h->left_mb_xy[0];
5149 const int mbb_xy = h->top_mb_xy;
5153 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5154 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5157 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5160 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5163 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5165 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5171 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5172 int cbp_b, cbp_a, ctx, cbp = 0;
5174 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5175 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5177 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5178 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5179 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5180 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5181 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5182 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5183 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5184 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5187 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5191 cbp_a = (h->left_cbp>>4)&0x03;
5192 cbp_b = (h-> top_cbp>>4)&0x03;
5195 if( cbp_a > 0 ) ctx++;
5196 if( cbp_b > 0 ) ctx += 2;
5197 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5201 if( cbp_a == 2 ) ctx++;
5202 if( cbp_b == 2 ) ctx += 2;
5203 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5205 static int decode_cabac_mb_dqp( H264Context *h) {
5209 if( h->last_qscale_diff != 0 )
5212 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5218 if(val > 102) //prevent infinite loop
5225 return -(val + 1)/2;
5227 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5228 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5230 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5232 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5236 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5238 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5239 return 0; /* B_Direct_8x8 */
5240 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5241 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5243 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5244 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5245 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5248 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5249 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5253 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5254 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5257 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5258 int refa = h->ref_cache[list][scan8[n] - 1];
5259 int refb = h->ref_cache[list][scan8[n] - 8];
5263 if( h->slice_type == B_TYPE) {
5264 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5266 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5275 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5281 if(ref >= 32 /*h->ref_list[list]*/){
5282 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5283 return 0; //FIXME we should return -1 and check the return everywhere
5289 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5290 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5291 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5292 int ctxbase = (l == 0) ? 40 : 47;
5297 else if( amvd > 32 )
5302 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5307 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5315 while( get_cabac_bypass( &h->cabac ) ) {
5319 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5324 if( get_cabac_bypass( &h->cabac ) )
5328 return get_cabac_bypass_sign( &h->cabac, -mvd );
5331 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5336 nza = h->left_cbp&0x100;
5337 nzb = h-> top_cbp&0x100;
5338 } else if( cat == 1 || cat == 2 ) {
5339 nza = h->non_zero_count_cache[scan8[idx] - 1];
5340 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5341 } else if( cat == 3 ) {
5342 nza = (h->left_cbp>>(6+idx))&0x01;
5343 nzb = (h-> top_cbp>>(6+idx))&0x01;
5346 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5347 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5356 return ctx + 4 * cat;
5359 static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
5360 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5361 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5362 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5363 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5366 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5367 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5368 static const int significant_coeff_flag_offset[2][6] = {
5369 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5370 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5372 static const int last_coeff_flag_offset[2][6] = {
5373 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5374 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5376 static const int coeff_abs_level_m1_offset[6] = {
5377 227+0, 227+10, 227+20, 227+30, 227+39, 426
5379 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5380 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5381 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5382 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5383 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5384 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5385 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5386 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5387 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5393 int coeff_count = 0;
5396 int abslevelgt1 = 0;
5398 uint8_t *significant_coeff_ctx_base;
5399 uint8_t *last_coeff_ctx_base;
5400 uint8_t *abs_level_m1_ctx_base;
5403 #define CABAC_ON_STACK
5405 #ifdef CABAC_ON_STACK
5408 cc.range = h->cabac.range;
5409 cc.low = h->cabac.low;
5410 cc.bytestream= h->cabac.bytestream;
5412 #define CC &h->cabac
5416 /* cat: 0-> DC 16x16 n = 0
5417 * 1-> AC 16x16 n = luma4x4idx
5418 * 2-> Luma4x4 n = luma4x4idx
5419 * 3-> DC Chroma n = iCbCr
5420 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5421 * 5-> Luma8x8 n = 4 * luma8x8idx
5424 /* read coded block flag */
5426 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5427 if( cat == 1 || cat == 2 )
5428 h->non_zero_count_cache[scan8[n]] = 0;
5430 h->non_zero_count_cache[scan8[16+n]] = 0;
5431 #ifdef CABAC_ON_STACK
5432 h->cabac.range = cc.range ;
5433 h->cabac.low = cc.low ;
5434 h->cabac.bytestream= cc.bytestream;
5440 significant_coeff_ctx_base = h->cabac_state
5441 + significant_coeff_flag_offset[MB_FIELD][cat];
5442 last_coeff_ctx_base = h->cabac_state
5443 + last_coeff_flag_offset[MB_FIELD][cat];
5444 abs_level_m1_ctx_base = h->cabac_state
5445 + coeff_abs_level_m1_offset[cat];
5448 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5449 for(last= 0; last < coefs; last++) { \
5450 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5451 if( get_cabac( CC, sig_ctx )) { \
5452 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5453 index[coeff_count++] = last; \
5454 if( get_cabac( CC, last_ctx ) ) { \
5460 if( last == max_coeff -1 ) {\
5461 index[coeff_count++] = last;\
5463 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5464 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5465 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5467 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5469 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5471 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5474 assert(coeff_count > 0);
5477 h->cbp_table[mb_xy] |= 0x100;
5478 else if( cat == 1 || cat == 2 )
5479 h->non_zero_count_cache[scan8[n]] = coeff_count;
5481 h->cbp_table[mb_xy] |= 0x40 << n;
5483 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5486 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5489 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5490 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5491 int j= scantable[index[coeff_count]];
5493 if( get_cabac( CC, ctx ) == 0 ) {
5495 block[j] = get_cabac_bypass_sign( CC, -1);
5497 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
5503 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5504 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5508 if( coeff_abs >= 15 ) {
5510 while( get_cabac_bypass( CC ) ) {
5516 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5522 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5523 else block[j] = coeff_abs;
5525 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5526 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5532 #ifdef CABAC_ON_STACK
5533 h->cabac.range = cc.range ;
5534 h->cabac.low = cc.low ;
5535 h->cabac.bytestream= cc.bytestream;
5540 static inline void compute_mb_neighbors(H264Context *h)
5542 MpegEncContext * const s = &h->s;
5543 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5544 h->top_mb_xy = mb_xy - s->mb_stride;
5545 h->left_mb_xy[0] = mb_xy - 1;
5547 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5548 const int top_pair_xy = pair_xy - s->mb_stride;
5549 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5550 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5551 const int curr_mb_frame_flag = !MB_FIELD;
5552 const int bottom = (s->mb_y & 1);
5554 ? !curr_mb_frame_flag // bottom macroblock
5555 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5557 h->top_mb_xy -= s->mb_stride;
5559 if (left_mb_frame_flag != curr_mb_frame_flag) {
5560 h->left_mb_xy[0] = pair_xy - 1;
5562 } else if (FIELD_PICTURE) {
5563 h->top_mb_xy -= s->mb_stride;
5569 * decodes a macroblock
5570 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5572 static int decode_mb_cabac(H264Context *h) {
5573 MpegEncContext * const s = &h->s;
5574 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5575 int mb_type, partition_count, cbp = 0;
5576 int dct8x8_allowed= h->pps.transform_8x8_mode;
5578 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5580 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5581 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
5583 /* a skipped mb needs the aff flag from the following mb */
5584 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5585 predict_field_decoding_flag(h);
5586 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5587 skip = h->next_mb_skipped;
5589 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5590 /* read skip flags */
5592 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5593 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5594 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5595 if(h->next_mb_skipped)
5596 predict_field_decoding_flag(h);
5598 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5603 h->cbp_table[mb_xy] = 0;
5604 h->chroma_pred_mode_table[mb_xy] = 0;
5605 h->last_qscale_diff = 0;
5612 if( (s->mb_y&1) == 0 )
5614 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5616 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5618 h->prev_mb_skipped = 0;
5620 compute_mb_neighbors(h);
5621 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5622 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5626 if( h->slice_type == B_TYPE ) {
5628 partition_count= b_mb_type_info[mb_type].partition_count;
5629 mb_type= b_mb_type_info[mb_type].type;
5632 goto decode_intra_mb;
5634 } else if( h->slice_type == P_TYPE ) {
5636 partition_count= p_mb_type_info[mb_type].partition_count;
5637 mb_type= p_mb_type_info[mb_type].type;
5640 goto decode_intra_mb;
5643 assert(h->slice_type == I_TYPE);
5645 partition_count = 0;
5646 cbp= i_mb_type_info[mb_type].cbp;
5647 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5648 mb_type= i_mb_type_info[mb_type].type;
5651 mb_type |= MB_TYPE_INTERLACED;
5653 h->slice_table[ mb_xy ]= h->slice_num;
5655 if(IS_INTRA_PCM(mb_type)) {
5659 // We assume these blocks are very rare so we do not optimize it.
5660 // FIXME The two following lines get the bitstream position in the cabac
5661 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5662 ptr= h->cabac.bytestream;
5663 if(h->cabac.low&0x1) ptr--;
5665 if(h->cabac.low&0x1FF) ptr--;
5668 // The pixels are stored in the same order as levels in h->mb array.
5669 for(y=0; y<16; y++){
5670 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5671 for(x=0; x<16; x++){
5672 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5673 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5677 const int index= 256 + 4*(y&3) + 32*(y>>2);
5679 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5680 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5684 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5686 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5687 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5691 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5693 // All blocks are present
5694 h->cbp_table[mb_xy] = 0x1ef;
5695 h->chroma_pred_mode_table[mb_xy] = 0;
5696 // In deblocking, the quantizer is 0
5697 s->current_picture.qscale_table[mb_xy]= 0;
5698 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5699 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5700 // All coeffs are present
5701 memset(h->non_zero_count[mb_xy], 16, 16);
5702 s->current_picture.mb_type[mb_xy]= mb_type;
5707 h->ref_count[0] <<= 1;
5708 h->ref_count[1] <<= 1;
5711 fill_caches(h, mb_type, 0);
5713 if( IS_INTRA( mb_type ) ) {
5715 if( IS_INTRA4x4( mb_type ) ) {
5716 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5717 mb_type |= MB_TYPE_8x8DCT;
5718 for( i = 0; i < 16; i+=4 ) {
5719 int pred = pred_intra_mode( h, i );
5720 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5721 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5724 for( i = 0; i < 16; i++ ) {
5725 int pred = pred_intra_mode( h, i );
5726 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5728 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5731 write_back_intra_pred_mode(h);
5732 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5734 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5735 if( h->intra16x16_pred_mode < 0 ) return -1;
5737 h->chroma_pred_mode_table[mb_xy] =
5738 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5740 pred_mode= check_intra_pred_mode( h, pred_mode );
5741 if( pred_mode < 0 ) return -1;
5742 h->chroma_pred_mode= pred_mode;
5743 } else if( partition_count == 4 ) {
5744 int i, j, sub_partition_count[4], list, ref[2][4];
5746 if( h->slice_type == B_TYPE ) {
5747 for( i = 0; i < 4; i++ ) {
5748 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5749 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5750 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5752 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5753 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5754 pred_direct_motion(h, &mb_type);
5755 h->ref_cache[0][scan8[4]] =
5756 h->ref_cache[1][scan8[4]] =
5757 h->ref_cache[0][scan8[12]] =
5758 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5759 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5760 for( i = 0; i < 4; i++ )
5761 if( IS_DIRECT(h->sub_mb_type[i]) )
5762 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5766 for( i = 0; i < 4; i++ ) {
5767 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5768 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5769 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5773 for( list = 0; list < h->list_count; list++ ) {
5774 for( i = 0; i < 4; i++ ) {
5775 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5776 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5777 if( h->ref_count[list] > 1 )
5778 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5784 h->ref_cache[list][ scan8[4*i]+1 ]=
5785 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5790 dct8x8_allowed = get_dct8x8_allowed(h);
5792 for(list=0; list<h->list_count; list++){
5794 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5795 if(IS_DIRECT(h->sub_mb_type[i])){
5796 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5800 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5801 const int sub_mb_type= h->sub_mb_type[i];
5802 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5803 for(j=0; j<sub_partition_count[i]; j++){
5806 const int index= 4*i + block_width*j;
5807 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5808 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5809 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5811 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5812 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5813 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5815 if(IS_SUB_8X8(sub_mb_type)){
5817 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5819 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5822 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5824 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5825 }else if(IS_SUB_8X4(sub_mb_type)){
5826 mv_cache[ 1 ][0]= mx;
5827 mv_cache[ 1 ][1]= my;
5829 mvd_cache[ 1 ][0]= mx - mpx;
5830 mvd_cache[ 1 ][1]= my - mpy;
5831 }else if(IS_SUB_4X8(sub_mb_type)){
5832 mv_cache[ 8 ][0]= mx;
5833 mv_cache[ 8 ][1]= my;
5835 mvd_cache[ 8 ][0]= mx - mpx;
5836 mvd_cache[ 8 ][1]= my - mpy;
5838 mv_cache[ 0 ][0]= mx;
5839 mv_cache[ 0 ][1]= my;
5841 mvd_cache[ 0 ][0]= mx - mpx;
5842 mvd_cache[ 0 ][1]= my - mpy;
5845 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5846 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5847 p[0] = p[1] = p[8] = p[9] = 0;
5848 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5852 } else if( IS_DIRECT(mb_type) ) {
5853 pred_direct_motion(h, &mb_type);
5854 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5855 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5856 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5858 int list, mx, my, i, mpx, mpy;
5859 if(IS_16X16(mb_type)){
5860 for(list=0; list<h->list_count; list++){
5861 if(IS_DIR(mb_type, 0, list)){
5862 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5863 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5865 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5867 for(list=0; list<h->list_count; list++){
5868 if(IS_DIR(mb_type, 0, list)){
5869 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5871 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5872 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5873 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5875 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5876 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5878 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5881 else if(IS_16X8(mb_type)){
5882 for(list=0; list<h->list_count; list++){
5884 if(IS_DIR(mb_type, i, list)){
5885 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5886 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5888 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5891 for(list=0; list<h->list_count; list++){
5893 if(IS_DIR(mb_type, i, list)){
5894 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5895 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5896 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5897 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5899 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5900 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5902 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5903 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5908 assert(IS_8X16(mb_type));
5909 for(list=0; list<h->list_count; list++){
5911 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5912 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5913 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5915 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5918 for(list=0; list<h->list_count; list++){
5920 if(IS_DIR(mb_type, i, list)){
5921 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5922 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5923 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5925 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5926 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5927 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5929 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5930 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5937 if( IS_INTER( mb_type ) ) {
5938 h->chroma_pred_mode_table[mb_xy] = 0;
5939 write_back_motion( h, mb_type );
5942 if( !IS_INTRA16x16( mb_type ) ) {
5943 cbp = decode_cabac_mb_cbp_luma( h );
5944 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5947 h->cbp_table[mb_xy] = h->cbp = cbp;
5949 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5950 if( decode_cabac_mb_transform_size( h ) )
5951 mb_type |= MB_TYPE_8x8DCT;
5953 s->current_picture.mb_type[mb_xy]= mb_type;
5955 if( cbp || IS_INTRA16x16( mb_type ) ) {
5956 const uint8_t *scan, *scan8x8, *dc_scan;
5957 const uint32_t *qmul;
5960 if(IS_INTERLACED(mb_type)){
5961 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5962 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5963 dc_scan= luma_dc_field_scan;
5965 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5966 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5967 dc_scan= luma_dc_zigzag_scan;
5970 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5971 if( dqp == INT_MIN ){
5972 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5976 if(((unsigned)s->qscale) > 51){
5977 if(s->qscale<0) s->qscale+= 52;
5978 else s->qscale-= 52;
5980 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5981 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5983 if( IS_INTRA16x16( mb_type ) ) {
5985 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5986 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5989 qmul = h->dequant4_coeff[0][s->qscale];
5990 for( i = 0; i < 16; i++ ) {
5991 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5992 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5995 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5999 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6000 if( cbp & (1<<i8x8) ) {
6001 if( IS_8x8DCT(mb_type) ) {
6002 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6003 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6005 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6006 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6007 const int index = 4*i8x8 + i4x4;
6008 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6010 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6011 //STOP_TIMER("decode_residual")
6015 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6016 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6023 for( c = 0; c < 2; c++ ) {
6024 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6025 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6031 for( c = 0; c < 2; c++ ) {
6032 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6033 for( i = 0; i < 4; i++ ) {
6034 const int index = 16 + 4 * c + i;
6035 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6036 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6040 uint8_t * const nnz= &h->non_zero_count_cache[0];
6041 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6042 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6045 uint8_t * const nnz= &h->non_zero_count_cache[0];
6046 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6047 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6048 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6049 h->last_qscale_diff = 0;
6052 s->current_picture.qscale_table[mb_xy]= s->qscale;
6053 write_back_non_zero_count(h);
6056 h->ref_count[0] >>= 1;
6057 h->ref_count[1] >>= 1;
6064 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6066 const int index_a = qp + h->slice_alpha_c0_offset;
6067 const int alpha = (alpha_table+52)[index_a];
6068 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6073 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6074 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6076 /* 16px edge length, because bS=4 is triggered by being at
6077 * the edge of an intra MB, so all 4 bS are the same */
6078 for( d = 0; d < 16; d++ ) {
6079 const int p0 = pix[-1];
6080 const int p1 = pix[-2];
6081 const int p2 = pix[-3];
6083 const int q0 = pix[0];
6084 const int q1 = pix[1];
6085 const int q2 = pix[2];
6087 if( FFABS( p0 - q0 ) < alpha &&
6088 FFABS( p1 - p0 ) < beta &&
6089 FFABS( q1 - q0 ) < beta ) {
6091 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6092 if( FFABS( p2 - p0 ) < beta)
6094 const int p3 = pix[-4];
6096 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6097 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6098 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6101 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6103 if( FFABS( q2 - q0 ) < beta)
6105 const int q3 = pix[3];
6107 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6108 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6109 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6112 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6116 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6117 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6119 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6125 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6127 const int index_a = qp + h->slice_alpha_c0_offset;
6128 const int alpha = (alpha_table+52)[index_a];
6129 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6134 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6135 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6137 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6141 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6143 for( i = 0; i < 16; i++, pix += stride) {
6149 int bS_index = (i >> 1);
6152 bS_index |= (i & 1);
6155 if( bS[bS_index] == 0 ) {
6159 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6160 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6161 alpha = (alpha_table+52)[index_a];
6162 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6164 if( bS[bS_index] < 4 ) {
6165 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6166 const int p0 = pix[-1];
6167 const int p1 = pix[-2];
6168 const int p2 = pix[-3];
6169 const int q0 = pix[0];
6170 const int q1 = pix[1];
6171 const int q2 = pix[2];
6173 if( FFABS( p0 - q0 ) < alpha &&
6174 FFABS( p1 - p0 ) < beta &&
6175 FFABS( q1 - q0 ) < beta ) {
6179 if( FFABS( p2 - p0 ) < beta ) {
6180 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6183 if( FFABS( q2 - q0 ) < beta ) {
6184 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6188 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6189 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6190 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6191 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6194 const int p0 = pix[-1];
6195 const int p1 = pix[-2];
6196 const int p2 = pix[-3];
6198 const int q0 = pix[0];
6199 const int q1 = pix[1];
6200 const int q2 = pix[2];
6202 if( FFABS( p0 - q0 ) < alpha &&
6203 FFABS( p1 - p0 ) < beta &&
6204 FFABS( q1 - q0 ) < beta ) {
6206 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6207 if( FFABS( p2 - p0 ) < beta)
6209 const int p3 = pix[-4];
6211 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6212 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6213 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6216 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6218 if( FFABS( q2 - q0 ) < beta)
6220 const int q3 = pix[3];
6222 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6223 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6224 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6227 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6231 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6232 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6234 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6239 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6241 for( i = 0; i < 8; i++, pix += stride) {
6249 if( bS[bS_index] == 0 ) {
6253 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6254 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6255 alpha = (alpha_table+52)[index_a];
6256 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6258 if( bS[bS_index] < 4 ) {
6259 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6260 const int p0 = pix[-1];
6261 const int p1 = pix[-2];
6262 const int q0 = pix[0];
6263 const int q1 = pix[1];
6265 if( FFABS( p0 - q0 ) < alpha &&
6266 FFABS( p1 - p0 ) < beta &&
6267 FFABS( q1 - q0 ) < beta ) {
6268 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6270 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6271 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6272 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6275 const int p0 = pix[-1];
6276 const int p1 = pix[-2];
6277 const int q0 = pix[0];
6278 const int q1 = pix[1];
6280 if( FFABS( p0 - q0 ) < alpha &&
6281 FFABS( p1 - p0 ) < beta &&
6282 FFABS( q1 - q0 ) < beta ) {
6284 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6285 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6286 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6292 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6294 const int index_a = qp + h->slice_alpha_c0_offset;
6295 const int alpha = (alpha_table+52)[index_a];
6296 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6297 const int pix_next = stride;
6302 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6303 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6305 /* 16px edge length, see filter_mb_edgev */
6306 for( d = 0; d < 16; d++ ) {
6307 const int p0 = pix[-1*pix_next];
6308 const int p1 = pix[-2*pix_next];
6309 const int p2 = pix[-3*pix_next];
6310 const int q0 = pix[0];
6311 const int q1 = pix[1*pix_next];
6312 const int q2 = pix[2*pix_next];
6314 if( FFABS( p0 - q0 ) < alpha &&
6315 FFABS( p1 - p0 ) < beta &&
6316 FFABS( q1 - q0 ) < beta ) {
6318 const int p3 = pix[-4*pix_next];
6319 const int q3 = pix[ 3*pix_next];
6321 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6322 if( FFABS( p2 - p0 ) < beta) {
6324 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6325 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6326 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6329 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6331 if( FFABS( q2 - q0 ) < beta) {
6333 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6334 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6335 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6338 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6342 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6343 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6345 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6352 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6354 const int index_a = qp + h->slice_alpha_c0_offset;
6355 const int alpha = (alpha_table+52)[index_a];
6356 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6361 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6362 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6364 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6368 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6369 MpegEncContext * const s = &h->s;
6371 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6373 mb_xy = mb_x + mb_y*s->mb_stride;
6375 if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6376 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6377 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6378 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6381 assert(!FRAME_MBAFF);
6383 mb_type = s->current_picture.mb_type[mb_xy];
6384 qp = s->current_picture.qscale_table[mb_xy];
6385 qp0 = s->current_picture.qscale_table[mb_xy-1];
6386 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6387 qpc = get_chroma_qp( h, 0, qp );
6388 qpc0 = get_chroma_qp( h, 0, qp0 );
6389 qpc1 = get_chroma_qp( h, 0, qp1 );
6390 qp0 = (qp + qp0 + 1) >> 1;
6391 qp1 = (qp + qp1 + 1) >> 1;
6392 qpc0 = (qpc + qpc0 + 1) >> 1;
6393 qpc1 = (qpc + qpc1 + 1) >> 1;
6394 qp_thresh = 15 - h->slice_alpha_c0_offset;
6395 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6396 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6399 if( IS_INTRA(mb_type) ) {
6400 int16_t bS4[4] = {4,4,4,4};
6401 int16_t bS3[4] = {3,3,3,3};
6402 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6403 if( IS_8x8DCT(mb_type) ) {
6404 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6405 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6406 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6407 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6409 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6410 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6411 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6412 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6413 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6414 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6415 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6416 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6418 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6419 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6420 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6421 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6422 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6423 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6424 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6425 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6428 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6429 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6431 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6433 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6435 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6436 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6437 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6438 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6440 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6441 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6442 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6443 (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
6445 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6446 bSv[0][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6447 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6448 bSv[1][0] = 0x0004000400040004ULL;
6450 #define FILTER(hv,dir,edge)\
6451 if(bSv[dir][edge]) {\
6452 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6454 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6455 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6461 } else if( IS_8x8DCT(mb_type) ) {
6480 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6481 MpegEncContext * const s = &h->s;
6482 const int mb_xy= mb_x + mb_y*s->mb_stride;
6483 const int mb_type = s->current_picture.mb_type[mb_xy];
6484 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6485 int first_vertical_edge_done = 0;
6487 /* FIXME: A given frame may occupy more than one position in
6488 * the reference list. So ref2frm should be populated with
6489 * frame numbers, not indices. */
6490 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6491 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6493 //for sufficiently low qp, filtering wouldn't do anything
6494 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6496 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6497 int qp = s->current_picture.qscale_table[mb_xy];
6499 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6500 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6506 // left mb is in picture
6507 && h->slice_table[mb_xy-1] != 255
6508 // and current and left pair do not have the same interlaced type
6509 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6510 // and left mb is in the same slice if deblocking_filter == 2
6511 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6512 /* First vertical edge is different in MBAFF frames
6513 * There are 8 different bS to compute and 2 different Qp
6515 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6516 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6521 int mb_qp, mbn0_qp, mbn1_qp;
6523 first_vertical_edge_done = 1;
6525 if( IS_INTRA(mb_type) )
6526 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6528 for( i = 0; i < 8; i++ ) {
6529 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6531 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6533 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6534 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6535 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6542 mb_qp = s->current_picture.qscale_table[mb_xy];
6543 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6544 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6545 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6546 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6547 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6548 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6549 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6550 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6551 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6552 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6553 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6554 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6557 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6558 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6559 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6560 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6561 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6563 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6564 for( dir = 0; dir < 2; dir++ )
6567 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6568 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6569 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6571 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6572 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6573 // how often to recheck mv-based bS when iterating between edges
6574 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6575 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6576 // how often to recheck mv-based bS when iterating along each edge
6577 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6579 if (first_vertical_edge_done) {
6581 first_vertical_edge_done = 0;
6584 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6587 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6588 && !IS_INTERLACED(mb_type)
6589 && IS_INTERLACED(mbm_type)
6591 // This is a special case in the norm where the filtering must
6592 // be done twice (one each of the field) even if we are in a
6593 // frame macroblock.
6595 static const int nnz_idx[4] = {4,5,6,3};
6596 unsigned int tmp_linesize = 2 * linesize;
6597 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6598 int mbn_xy = mb_xy - 2 * s->mb_stride;
6603 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6604 if( IS_INTRA(mb_type) ||
6605 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6606 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6608 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6609 for( i = 0; i < 4; i++ ) {
6610 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6611 mbn_nnz[nnz_idx[i]] != 0 )
6617 // Do not use s->qscale as luma quantizer because it has not the same
6618 // value in IPCM macroblocks.
6619 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6620 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6621 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6622 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6623 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6624 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6625 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6626 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6633 for( edge = start; edge < edges; edge++ ) {
6634 /* mbn_xy: neighbor macroblock */
6635 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6636 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6640 if( (edge&1) && IS_8x8DCT(mb_type) )
6643 if( IS_INTRA(mb_type) ||
6644 IS_INTRA(mbn_type) ) {
6647 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6648 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6657 bS[0] = bS[1] = bS[2] = bS[3] = value;
6662 if( edge & mask_edge ) {
6663 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6666 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6667 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6670 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6671 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6672 int bn_idx= b_idx - (dir ? 8:1);
6674 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6675 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6676 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6677 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6679 bS[0] = bS[1] = bS[2] = bS[3] = v;
6685 for( i = 0; i < 4; i++ ) {
6686 int x = dir == 0 ? edge : i;
6687 int y = dir == 0 ? i : edge;
6688 int b_idx= 8 + 4 + x + 8*y;
6689 int bn_idx= b_idx - (dir ? 8:1);
6691 if( h->non_zero_count_cache[b_idx] != 0 ||
6692 h->non_zero_count_cache[bn_idx] != 0 ) {
6698 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6699 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6700 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6701 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6709 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6714 // Do not use s->qscale as luma quantizer because it has not the same
6715 // value in IPCM macroblocks.
6716 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6717 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6718 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6719 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6721 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6722 if( (edge&1) == 0 ) {
6723 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6724 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6725 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6726 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6729 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6730 if( (edge&1) == 0 ) {
6731 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6732 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6733 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6734 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6741 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6742 MpegEncContext * const s = &h->s;
6743 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6747 if( h->pps.cabac ) {
6751 align_get_bits( &s->gb );
6754 ff_init_cabac_states( &h->cabac);
6755 ff_init_cabac_decoder( &h->cabac,
6756 s->gb.buffer + get_bits_count(&s->gb)/8,
6757 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6758 /* calculate pre-state */
6759 for( i= 0; i < 460; i++ ) {
6761 if( h->slice_type == I_TYPE )
6762 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6764 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6767 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6769 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6774 int ret = decode_mb_cabac(h);
6776 //STOP_TIMER("decode_mb_cabac")
6778 if(ret>=0) hl_decode_mb(h);
6780 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6783 if(ret>=0) ret = decode_mb_cabac(h);
6785 if(ret>=0) hl_decode_mb(h);
6788 eos = get_cabac_terminate( &h->cabac );
6790 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6791 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6792 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6796 if( ++s->mb_x >= s->mb_width ) {
6798 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6800 if(FIELD_OR_MBAFF_PICTURE) {
6805 if( eos || s->mb_y >= s->mb_height ) {
6806 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6807 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6814 int ret = decode_mb_cavlc(h);
6816 if(ret>=0) hl_decode_mb(h);
6818 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6820 ret = decode_mb_cavlc(h);
6822 if(ret>=0) hl_decode_mb(h);
6827 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6828 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6833 if(++s->mb_x >= s->mb_width){
6835 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6837 if(FIELD_OR_MBAFF_PICTURE) {
6840 if(s->mb_y >= s->mb_height){
6841 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6843 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6844 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6848 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6855 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6856 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6857 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6858 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6862 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6871 for(;s->mb_y < s->mb_height; s->mb_y++){
6872 for(;s->mb_x < s->mb_width; s->mb_x++){
6873 int ret= decode_mb(h);
6878 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6879 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6884 if(++s->mb_x >= s->mb_width){
6886 if(++s->mb_y >= s->mb_height){
6887 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6888 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6892 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6899 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6900 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6901 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6905 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6912 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6915 return -1; //not reached
6918 static int decode_unregistered_user_data(H264Context *h, int size){
6919 MpegEncContext * const s = &h->s;
6920 uint8_t user_data[16+256];
6926 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6927 user_data[i]= get_bits(&s->gb, 8);
6931 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6932 if(e==1 && build>=0)
6933 h->x264_build= build;
6935 if(s->avctx->debug & FF_DEBUG_BUGS)
6936 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6939 skip_bits(&s->gb, 8);
6944 static int decode_sei(H264Context *h){
6945 MpegEncContext * const s = &h->s;
6947 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6952 type+= show_bits(&s->gb, 8);
6953 }while(get_bits(&s->gb, 8) == 255);
6957 size+= show_bits(&s->gb, 8);
6958 }while(get_bits(&s->gb, 8) == 255);
6962 if(decode_unregistered_user_data(h, size) < 0)
6966 skip_bits(&s->gb, 8*size);
6969 //FIXME check bits here
6970 align_get_bits(&s->gb);
6976 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6977 MpegEncContext * const s = &h->s;
6979 cpb_count = get_ue_golomb(&s->gb) + 1;
6980 get_bits(&s->gb, 4); /* bit_rate_scale */
6981 get_bits(&s->gb, 4); /* cpb_size_scale */
6982 for(i=0; i<cpb_count; i++){
6983 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6984 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6985 get_bits1(&s->gb); /* cbr_flag */
6987 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6988 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6989 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6990 get_bits(&s->gb, 5); /* time_offset_length */
6993 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6994 MpegEncContext * const s = &h->s;
6995 int aspect_ratio_info_present_flag;
6996 unsigned int aspect_ratio_idc;
6997 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6999 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7001 if( aspect_ratio_info_present_flag ) {
7002 aspect_ratio_idc= get_bits(&s->gb, 8);
7003 if( aspect_ratio_idc == EXTENDED_SAR ) {
7004 sps->sar.num= get_bits(&s->gb, 16);
7005 sps->sar.den= get_bits(&s->gb, 16);
7006 }else if(aspect_ratio_idc < 14){
7007 sps->sar= pixel_aspect[aspect_ratio_idc];
7009 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7016 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7018 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7019 get_bits1(&s->gb); /* overscan_appropriate_flag */
7022 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7023 get_bits(&s->gb, 3); /* video_format */
7024 get_bits1(&s->gb); /* video_full_range_flag */
7025 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7026 get_bits(&s->gb, 8); /* colour_primaries */
7027 get_bits(&s->gb, 8); /* transfer_characteristics */
7028 get_bits(&s->gb, 8); /* matrix_coefficients */
7032 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7033 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7034 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7037 sps->timing_info_present_flag = get_bits1(&s->gb);
7038 if(sps->timing_info_present_flag){
7039 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7040 sps->time_scale = get_bits_long(&s->gb, 32);
7041 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7044 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7045 if(nal_hrd_parameters_present_flag)
7046 decode_hrd_parameters(h, sps);
7047 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7048 if(vcl_hrd_parameters_present_flag)
7049 decode_hrd_parameters(h, sps);
7050 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7051 get_bits1(&s->gb); /* low_delay_hrd_flag */
7052 get_bits1(&s->gb); /* pic_struct_present_flag */
7054 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7055 if(sps->bitstream_restriction_flag){
7056 unsigned int num_reorder_frames;
7057 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7058 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7059 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7060 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7061 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7062 num_reorder_frames= get_ue_golomb(&s->gb);
7063 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7065 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7066 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7070 sps->num_reorder_frames= num_reorder_frames;
7076 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7077 const uint8_t *jvt_list, const uint8_t *fallback_list){
7078 MpegEncContext * const s = &h->s;
7079 int i, last = 8, next = 8;
7080 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7081 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7082 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7084 for(i=0;i<size;i++){
7086 next = (last + get_se_golomb(&s->gb)) & 0xff;
7087 if(!i && !next){ /* matrix not written, we use the preset one */
7088 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7091 last = factors[scan[i]] = next ? next : last;
7095 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7096 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7097 MpegEncContext * const s = &h->s;
7098 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7099 const uint8_t *fallback[4] = {
7100 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7101 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7102 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7103 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7105 if(get_bits1(&s->gb)){
7106 sps->scaling_matrix_present |= is_sps;
7107 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7108 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7109 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7110 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7111 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7112 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7113 if(is_sps || pps->transform_8x8_mode){
7114 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7115 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7117 } else if(fallback_sps) {
7118 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7119 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7124 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7127 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7128 const size_t size, const char *name)
7131 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7136 vec[id] = av_mallocz(size);
7138 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7143 static inline int decode_seq_parameter_set(H264Context *h){
7144 MpegEncContext * const s = &h->s;
7145 int profile_idc, level_idc;
7146 unsigned int sps_id, tmp, mb_width, mb_height;
7150 profile_idc= get_bits(&s->gb, 8);
7151 get_bits1(&s->gb); //constraint_set0_flag
7152 get_bits1(&s->gb); //constraint_set1_flag
7153 get_bits1(&s->gb); //constraint_set2_flag
7154 get_bits1(&s->gb); //constraint_set3_flag
7155 get_bits(&s->gb, 4); // reserved
7156 level_idc= get_bits(&s->gb, 8);
7157 sps_id= get_ue_golomb(&s->gb);
7159 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7163 sps->profile_idc= profile_idc;
7164 sps->level_idc= level_idc;
7166 if(sps->profile_idc >= 100){ //high profile
7167 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7168 get_bits1(&s->gb); //residual_color_transform_flag
7169 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7170 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7171 sps->transform_bypass = get_bits1(&s->gb);
7172 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7174 sps->scaling_matrix_present = 0;
7176 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7177 sps->poc_type= get_ue_golomb(&s->gb);
7179 if(sps->poc_type == 0){ //FIXME #define
7180 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7181 } else if(sps->poc_type == 1){//FIXME #define
7182 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7183 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7184 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7185 tmp= get_ue_golomb(&s->gb);
7187 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7188 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7191 sps->poc_cycle_length= tmp;
7193 for(i=0; i<sps->poc_cycle_length; i++)
7194 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7195 }else if(sps->poc_type != 2){
7196 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7200 tmp= get_ue_golomb(&s->gb);
7201 if(tmp > MAX_PICTURE_COUNT-2){
7202 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7204 sps->ref_frame_count= tmp;
7205 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7206 mb_width= get_ue_golomb(&s->gb) + 1;
7207 mb_height= get_ue_golomb(&s->gb) + 1;
7208 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7209 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7210 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7213 sps->mb_width = mb_width;
7214 sps->mb_height= mb_height;
7216 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7217 if(!sps->frame_mbs_only_flag)
7218 sps->mb_aff= get_bits1(&s->gb);
7222 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7224 #ifndef ALLOW_INTERLACE
7226 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7228 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7229 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7231 sps->crop= get_bits1(&s->gb);
7233 sps->crop_left = get_ue_golomb(&s->gb);
7234 sps->crop_right = get_ue_golomb(&s->gb);
7235 sps->crop_top = get_ue_golomb(&s->gb);
7236 sps->crop_bottom= get_ue_golomb(&s->gb);
7237 if(sps->crop_left || sps->crop_top){
7238 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7244 sps->crop_bottom= 0;
7247 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7248 if( sps->vui_parameters_present_flag )
7249 decode_vui_parameters(h, sps);
7251 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7252 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7253 sps_id, sps->profile_idc, sps->level_idc,
7255 sps->ref_frame_count,
7256 sps->mb_width, sps->mb_height,
7257 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7258 sps->direct_8x8_inference_flag ? "8B8" : "",
7259 sps->crop_left, sps->crop_right,
7260 sps->crop_top, sps->crop_bottom,
7261 sps->vui_parameters_present_flag ? "VUI" : ""
7268 build_qp_table(PPS *pps, int t, int index)
7271 for(i = 0; i < 255; i++)
7272 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7275 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7276 MpegEncContext * const s = &h->s;
7277 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7280 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7284 tmp= get_ue_golomb(&s->gb);
7285 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7286 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7291 pps->cabac= get_bits1(&s->gb);
7292 pps->pic_order_present= get_bits1(&s->gb);
7293 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7294 if(pps->slice_group_count > 1 ){
7295 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7296 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7297 switch(pps->mb_slice_group_map_type){
7300 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7301 | run_length[ i ] |1 |ue(v) |
7306 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7308 | top_left_mb[ i ] |1 |ue(v) |
7309 | bottom_right_mb[ i ] |1 |ue(v) |
7317 | slice_group_change_direction_flag |1 |u(1) |
7318 | slice_group_change_rate_minus1 |1 |ue(v) |
7323 | slice_group_id_cnt_minus1 |1 |ue(v) |
7324 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7326 | slice_group_id[ i ] |1 |u(v) |
7331 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7332 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7333 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7334 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7335 pps->ref_count[0]= pps->ref_count[1]= 1;
7339 pps->weighted_pred= get_bits1(&s->gb);
7340 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7341 pps->init_qp= get_se_golomb(&s->gb) + 26;
7342 pps->init_qs= get_se_golomb(&s->gb) + 26;
7343 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7344 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7345 pps->constrained_intra_pred= get_bits1(&s->gb);
7346 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7348 pps->transform_8x8_mode= 0;
7349 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7350 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7351 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7353 if(get_bits_count(&s->gb) < bit_length){
7354 pps->transform_8x8_mode= get_bits1(&s->gb);
7355 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7356 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7358 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7361 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7362 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7363 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7364 h->pps.chroma_qp_diff= 1;
7366 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7368 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7369 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7370 pps_id, pps->sps_id,
7371 pps->cabac ? "CABAC" : "CAVLC",
7372 pps->slice_group_count,
7373 pps->ref_count[0], pps->ref_count[1],
7374 pps->weighted_pred ? "weighted" : "",
7375 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7376 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7377 pps->constrained_intra_pred ? "CONSTR" : "",
7378 pps->redundant_pic_cnt_present ? "REDU" : "",
7379 pps->transform_8x8_mode ? "8x8DCT" : ""
7387 * Call decode_slice() for each context.
7389 * @param h h264 master context
7390 * @param context_count number of contexts to execute
7392 static void execute_decode_slices(H264Context *h, int context_count){
7393 MpegEncContext * const s = &h->s;
7394 AVCodecContext * const avctx= s->avctx;
7398 if(context_count == 1) {
7399 decode_slice(avctx, h);
7401 for(i = 1; i < context_count; i++) {
7402 hx = h->thread_context[i];
7403 hx->s.error_resilience = avctx->error_resilience;
7404 hx->s.error_count = 0;
7407 avctx->execute(avctx, (void *)decode_slice,
7408 (void **)h->thread_context, NULL, context_count);
7410 /* pull back stuff from slices to master context */
7411 hx = h->thread_context[context_count - 1];
7412 s->mb_x = hx->s.mb_x;
7413 s->mb_y = hx->s.mb_y;
7414 s->dropable = hx->s.dropable;
7415 s->picture_structure = hx->s.picture_structure;
7416 for(i = 1; i < context_count; i++)
7417 h->s.error_count += h->thread_context[i]->s.error_count;
7422 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7423 MpegEncContext * const s = &h->s;
7424 AVCodecContext * const avctx= s->avctx;
7426 H264Context *hx; ///< thread context
7427 int context_count = 0;
7429 h->max_contexts = avctx->thread_count;
7432 for(i=0; i<50; i++){
7433 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7436 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7437 h->current_slice = 0;
7438 if (!s->first_field)
7439 s->current_picture_ptr= NULL;
7451 if(buf_index >= buf_size) break;
7453 for(i = 0; i < h->nal_length_size; i++)
7454 nalsize = (nalsize << 8) | buf[buf_index++];
7455 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7460 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7465 // start code prefix search
7466 for(; buf_index + 3 < buf_size; buf_index++){
7467 // This should always succeed in the first iteration.
7468 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7472 if(buf_index+3 >= buf_size) break;
7477 hx = h->thread_context[context_count];
7479 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7480 if (ptr==NULL || dst_length < 0){
7483 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7485 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7487 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7488 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7491 if (h->is_avc && (nalsize != consumed))
7492 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7494 buf_index += consumed;
7496 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7497 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7502 switch(hx->nal_unit_type){
7504 if (h->nal_unit_type != NAL_IDR_SLICE) {
7505 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7508 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7510 init_get_bits(&hx->s.gb, ptr, bit_length);
7512 hx->inter_gb_ptr= &hx->s.gb;
7513 hx->s.data_partitioning = 0;
7515 if((err = decode_slice_header(hx, h)))
7518 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7519 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7520 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7521 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7522 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7523 && avctx->skip_frame < AVDISCARD_ALL)
7527 init_get_bits(&hx->s.gb, ptr, bit_length);
7529 hx->inter_gb_ptr= NULL;
7530 hx->s.data_partitioning = 1;
7532 err = decode_slice_header(hx, h);
7535 init_get_bits(&hx->intra_gb, ptr, bit_length);
7536 hx->intra_gb_ptr= &hx->intra_gb;
7539 init_get_bits(&hx->inter_gb, ptr, bit_length);
7540 hx->inter_gb_ptr= &hx->inter_gb;
7542 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7543 && s->context_initialized
7545 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7546 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
7547 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
7548 && avctx->skip_frame < AVDISCARD_ALL)
7552 init_get_bits(&s->gb, ptr, bit_length);
7556 init_get_bits(&s->gb, ptr, bit_length);
7557 decode_seq_parameter_set(h);
7559 if(s->flags& CODEC_FLAG_LOW_DELAY)
7562 if(avctx->has_b_frames < 2)
7563 avctx->has_b_frames= !s->low_delay;
7566 init_get_bits(&s->gb, ptr, bit_length);
7568 decode_picture_parameter_set(h, bit_length);
7572 case NAL_END_SEQUENCE:
7573 case NAL_END_STREAM:
7574 case NAL_FILLER_DATA:
7576 case NAL_AUXILIARY_SLICE:
7579 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7582 if(context_count == h->max_contexts) {
7583 execute_decode_slices(h, context_count);
7588 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7590 /* Slice could not be decoded in parallel mode, copy down
7591 * NAL unit stuff to context 0 and restart. Note that
7592 * rbsp_buffer is not transfered, but since we no longer
7593 * run in parallel mode this should not be an issue. */
7594 h->nal_unit_type = hx->nal_unit_type;
7595 h->nal_ref_idc = hx->nal_ref_idc;
7601 execute_decode_slices(h, context_count);
7606 * returns the number of bytes consumed for building the current frame
7608 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7609 if(s->flags&CODEC_FLAG_TRUNCATED){
7610 pos -= s->parse_context.last_index;
7611 if(pos<0) pos=0; // FIXME remove (unneeded?)
7615 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7616 if(pos+10>buf_size) pos=buf_size; // oops ;)
7622 static int decode_frame(AVCodecContext *avctx,
7623 void *data, int *data_size,
7624 uint8_t *buf, int buf_size)
7626 H264Context *h = avctx->priv_data;
7627 MpegEncContext *s = &h->s;
7628 AVFrame *pict = data;
7631 s->flags= avctx->flags;
7632 s->flags2= avctx->flags2;
7634 /* no supplementary picture */
7635 if (buf_size == 0) {
7639 //FIXME factorize this with the output code below
7640 out = h->delayed_pic[0];
7642 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7643 if(h->delayed_pic[i]->poc < out->poc){
7644 out = h->delayed_pic[i];
7648 for(i=out_idx; h->delayed_pic[i]; i++)
7649 h->delayed_pic[i] = h->delayed_pic[i+1];
7652 *data_size = sizeof(AVFrame);
7653 *pict= *(AVFrame*)out;
7659 if(s->flags&CODEC_FLAG_TRUNCATED){
7660 int next= ff_h264_find_frame_end(h, buf, buf_size);
7662 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7664 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7667 if(h->is_avc && !h->got_avcC) {
7668 int i, cnt, nalsize;
7669 unsigned char *p = avctx->extradata;
7670 if(avctx->extradata_size < 7) {
7671 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7675 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7678 /* sps and pps in the avcC always have length coded with 2 bytes,
7679 so put a fake nal_length_size = 2 while parsing them */
7680 h->nal_length_size = 2;
7681 // Decode sps from avcC
7682 cnt = *(p+5) & 0x1f; // Number of sps
7684 for (i = 0; i < cnt; i++) {
7685 nalsize = AV_RB16(p) + 2;
7686 if(decode_nal_units(h, p, nalsize) < 0) {
7687 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7692 // Decode pps from avcC
7693 cnt = *(p++); // Number of pps
7694 for (i = 0; i < cnt; i++) {
7695 nalsize = AV_RB16(p) + 2;
7696 if(decode_nal_units(h, p, nalsize) != nalsize) {
7697 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7702 // Now store right nal length size, that will be use to parse all other nals
7703 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7704 // Do not reparse avcC
7708 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7709 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7713 buf_index=decode_nal_units(h, buf, buf_size);
7717 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7718 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7719 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7723 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7724 Picture *out = s->current_picture_ptr;
7725 Picture *cur = s->current_picture_ptr;
7726 Picture *prev = h->delayed_output_pic;
7727 int i, pics, cross_idr, out_of_order, out_idx;
7731 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7732 s->current_picture_ptr->pict_type= s->pict_type;
7734 h->prev_frame_num_offset= h->frame_num_offset;
7735 h->prev_frame_num= h->frame_num;
7737 h->prev_poc_msb= h->poc_msb;
7738 h->prev_poc_lsb= h->poc_lsb;
7739 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7743 * FIXME: Error handling code does not seem to support interlaced
7744 * when slices span multiple rows
7745 * The ff_er_add_slice calls don't work right for bottom
7746 * fields; they cause massive erroneous error concealing
7747 * Error marking covers both fields (top and bottom).
7748 * This causes a mismatched s->error_count
7749 * and a bad error table. Further, the error count goes to
7750 * INT_MAX when called for bottom field, because mb_y is
7751 * past end by one (callers fault) and resync_mb_y != 0
7752 * causes problems for the first MB line, too.
7759 if (s->first_field) {
7760 /* Wait for second field. */
7764 //FIXME do something with unavailable reference frames
7766 #if 0 //decode order
7767 *data_size = sizeof(AVFrame);
7769 /* Sort B-frames into display order */
7771 if(h->sps.bitstream_restriction_flag
7772 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7773 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7778 while(h->delayed_pic[pics]) pics++;
7780 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7782 h->delayed_pic[pics++] = cur;
7783 if(cur->reference == 0)
7784 cur->reference = DELAYED_PIC_REF;
7787 for(i=0; h->delayed_pic[i]; i++)
7788 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7791 out = h->delayed_pic[0];
7793 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7794 if(h->delayed_pic[i]->poc < out->poc){
7795 out = h->delayed_pic[i];
7799 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7800 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7802 else if(prev && pics <= s->avctx->has_b_frames)
7804 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7806 ((!cross_idr && prev && out->poc > prev->poc + 2)
7807 || cur->pict_type == B_TYPE)))
7810 s->avctx->has_b_frames++;
7813 else if(out_of_order)
7816 if(out_of_order || pics > s->avctx->has_b_frames){
7817 for(i=out_idx; h->delayed_pic[i]; i++)
7818 h->delayed_pic[i] = h->delayed_pic[i+1];
7824 *data_size = sizeof(AVFrame);
7825 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7826 prev->reference = 0;
7827 h->delayed_output_pic = out;
7831 *pict= *(AVFrame*)out;
7833 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7837 assert(pict->data[0] || !*data_size);
7838 ff_print_debug_info(s, pict);
7839 //printf("out %d\n", (int)pict->data[0]);
7842 /* Return the Picture timestamp as the frame number */
7843 /* we substract 1 because it is added on utils.c */
7844 avctx->frame_number = s->picture_number - 1;
7846 return get_consumed_bytes(s, buf_index, buf_size);
7849 static inline void fill_mb_avail(H264Context *h){
7850 MpegEncContext * const s = &h->s;
7851 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7854 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7855 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7856 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7862 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7863 h->mb_avail[4]= 1; //FIXME move out
7864 h->mb_avail[5]= 0; //FIXME move out
7871 #define SIZE (COUNT*40)
7877 // int int_temp[10000];
7879 AVCodecContext avctx;
7881 dsputil_init(&dsp, &avctx);
7883 init_put_bits(&pb, temp, SIZE);
7884 printf("testing unsigned exp golomb\n");
7885 for(i=0; i<COUNT; i++){
7887 set_ue_golomb(&pb, i);
7888 STOP_TIMER("set_ue_golomb");
7890 flush_put_bits(&pb);
7892 init_get_bits(&gb, temp, 8*SIZE);
7893 for(i=0; i<COUNT; i++){
7896 s= show_bits(&gb, 24);
7899 j= get_ue_golomb(&gb);
7901 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7904 STOP_TIMER("get_ue_golomb");
7908 init_put_bits(&pb, temp, SIZE);
7909 printf("testing signed exp golomb\n");
7910 for(i=0; i<COUNT; i++){
7912 set_se_golomb(&pb, i - COUNT/2);
7913 STOP_TIMER("set_se_golomb");
7915 flush_put_bits(&pb);
7917 init_get_bits(&gb, temp, 8*SIZE);
7918 for(i=0; i<COUNT; i++){
7921 s= show_bits(&gb, 24);
7924 j= get_se_golomb(&gb);
7925 if(j != i - COUNT/2){
7926 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7929 STOP_TIMER("get_se_golomb");
7932 printf("testing 4x4 (I)DCT\n");
7935 uint8_t src[16], ref[16];
7936 uint64_t error= 0, max_error=0;
7938 for(i=0; i<COUNT; i++){
7940 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7941 for(j=0; j<16; j++){
7942 ref[j]= random()%255;
7943 src[j]= random()%255;
7946 h264_diff_dct_c(block, src, ref, 4);
7949 for(j=0; j<16; j++){
7950 // printf("%d ", block[j]);
7951 block[j]= block[j]*4;
7952 if(j&1) block[j]= (block[j]*4 + 2)/5;
7953 if(j&4) block[j]= (block[j]*4 + 2)/5;
7957 s->dsp.h264_idct_add(ref, block, 4);
7958 /* for(j=0; j<16; j++){
7959 printf("%d ", ref[j]);
7963 for(j=0; j<16; j++){
7964 int diff= FFABS(src[j] - ref[j]);
7967 max_error= FFMAX(max_error, diff);
7970 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7972 printf("testing quantizer\n");
7973 for(qp=0; qp<52; qp++){
7975 src1_block[i]= src2_block[i]= random()%255;
7979 printf("Testing NAL layer\n");
7981 uint8_t bitstream[COUNT];
7982 uint8_t nal[COUNT*2];
7984 memset(&h, 0, sizeof(H264Context));
7986 for(i=0; i<COUNT; i++){
7994 for(j=0; j<COUNT; j++){
7995 bitstream[j]= (random() % 255) + 1;
7998 for(j=0; j<zeros; j++){
7999 int pos= random() % COUNT;
8000 while(bitstream[pos] == 0){
8009 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8011 printf("encoding failed\n");
8015 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8019 if(out_length != COUNT){
8020 printf("incorrect length %d %d\n", out_length, COUNT);
8024 if(consumed != nal_length){
8025 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8029 if(memcmp(bitstream, out, COUNT)){
8030 printf("mismatch\n");
8035 printf("Testing RBSP\n");
8043 static int decode_end(AVCodecContext *avctx)
8045 H264Context *h = avctx->priv_data;
8046 MpegEncContext *s = &h->s;
8048 av_freep(&h->rbsp_buffer[0]);
8049 av_freep(&h->rbsp_buffer[1]);
8050 free_tables(h); //FIXME cleanup init stuff perhaps
8053 // memset(h, 0, sizeof(H264Context));
8059 AVCodec h264_decoder = {
8063 sizeof(H264Context),
8068 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,