]> rtime.felk.cvut.cz Git - frescor/ffmpeg.git/blob - libavcodec/mpegvideo.c
Fix ;;
[frescor/ffmpeg.git] / libavcodec / mpegvideo.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard.
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file mpegvideo.c
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "avcodec.h"
31 #include "dsputil.h"
32 #include "mpegvideo.h"
33 #include "mpegvideo_common.h"
34 #include "mjpegenc.h"
35 #include "msmpeg4.h"
36 #include "faandct.h"
37 #include <limits.h>
38
39 //#undef NDEBUG
40 //#include <assert.h>
41
42 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
43                                    DCTELEM *block, int n, int qscale);
44 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
45                                    DCTELEM *block, int n, int qscale);
46 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
47                                    DCTELEM *block, int n, int qscale);
48 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
49                                    DCTELEM *block, int n, int qscale);
50 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51                                    DCTELEM *block, int n, int qscale);
52 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
53                                   DCTELEM *block, int n, int qscale);
54 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
55                                   DCTELEM *block, int n, int qscale);
56 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57
58 extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
59 extern void XVMC_field_end(MpegEncContext *s);
60 extern void XVMC_decode_mb(MpegEncContext *s);
61
62 void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
63
64
65 /* enable all paranoid tests for rounding, overflows, etc... */
66 //#define PARANOID
67
68 //#define DEBUG
69
70
71 static const uint8_t ff_default_chroma_qscale_table[32]={
72 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
73     0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
74 };
75
76 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
77     int i;
78     int end;
79
80     st->scantable= src_scantable;
81
82     for(i=0; i<64; i++){
83         int j;
84         j = src_scantable[i];
85         st->permutated[i] = permutation[j];
86 #ifdef ARCH_POWERPC
87         st->inverse[j] = i;
88 #endif
89     }
90
91     end=-1;
92     for(i=0; i<64; i++){
93         int j;
94         j = st->permutated[i];
95         if(j>end) end=j;
96         st->raster_end[i]= end;
97     }
98 }
99
100 const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
101     int i;
102
103     assert(p<=end);
104     if(p>=end)
105         return end;
106
107     for(i=0; i<3; i++){
108         uint32_t tmp= *state << 8;
109         *state= tmp + *(p++);
110         if(tmp == 0x100 || p==end)
111             return p;
112     }
113
114     while(p<end){
115         if     (p[-1] > 1      ) p+= 3;
116         else if(p[-2]          ) p+= 2;
117         else if(p[-3]|(p[-1]-1)) p++;
118         else{
119             p++;
120             break;
121         }
122     }
123
124     p= FFMIN(p, end)-4;
125     *state= AV_RB32(p);
126
127     return p+4;
128 }
129
130 /* init common dct for both encoder and decoder */
131 int ff_dct_common_init(MpegEncContext *s)
132 {
133     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
134     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
135     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
136     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
137     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
138     if(s->flags & CODEC_FLAG_BITEXACT)
139         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
140     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
141
142 #if defined(HAVE_MMX)
143     MPV_common_init_mmx(s);
144 #elif defined(ARCH_ALPHA)
145     MPV_common_init_axp(s);
146 #elif defined(HAVE_MLIB)
147     MPV_common_init_mlib(s);
148 #elif defined(HAVE_MMI)
149     MPV_common_init_mmi(s);
150 #elif defined(ARCH_ARMV4L)
151     MPV_common_init_armv4l(s);
152 #elif defined(HAVE_ALTIVEC)
153     MPV_common_init_altivec(s);
154 #elif defined(ARCH_BFIN)
155     MPV_common_init_bfin(s);
156 #endif
157
158     /* load & permutate scantables
159        note: only wmv uses different ones
160     */
161     if(s->alternate_scan){
162         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
163         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
164     }else{
165         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
166         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
167     }
168     ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
169     ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
170
171     return 0;
172 }
173
174 void copy_picture(Picture *dst, Picture *src){
175     *dst = *src;
176     dst->type= FF_BUFFER_TYPE_COPY;
177 }
178
179 /**
180  * allocates a Picture
181  * The pixels are allocated/set by calling get_buffer() if shared=0
182  */
183 int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
184     const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) does not sig11
185     const int mb_array_size= s->mb_stride*s->mb_height;
186     const int b8_array_size= s->b8_stride*s->mb_height*2;
187     const int b4_array_size= s->b4_stride*s->mb_height*4;
188     int i;
189     int r= -1;
190
191     if(shared){
192         assert(pic->data[0]);
193         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
194         pic->type= FF_BUFFER_TYPE_SHARED;
195     }else{
196         assert(!pic->data[0]);
197
198         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
199
200         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
201             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
202             return -1;
203         }
204
205         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
206             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
207             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
208             return -1;
209         }
210
211         if(pic->linesize[1] != pic->linesize[2]){
212             av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
213             s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
214             return -1;
215         }
216
217         s->linesize  = pic->linesize[0];
218         s->uvlinesize= pic->linesize[1];
219     }
220
221     if(pic->qscale_table==NULL){
222         if (s->encoding) {
223             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
224             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
225             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
226         }
227
228         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
229         CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
230         CHECKED_ALLOCZ(pic->mb_type_base , (big_mb_num + s->mb_stride) * sizeof(uint32_t))
231         pic->mb_type= pic->mb_type_base + 2*s->mb_stride+1;
232         if(s->out_format == FMT_H264){
233             for(i=0; i<2; i++){
234                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
235                 pic->motion_val[i]= pic->motion_val_base[i]+4;
236                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
237             }
238             pic->motion_subsample_log2= 2;
239         }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
240             for(i=0; i<2; i++){
241                 CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
242                 pic->motion_val[i]= pic->motion_val_base[i]+4;
243                 CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
244             }
245             pic->motion_subsample_log2= 3;
246         }
247         if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
248             CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
249         }
250         pic->qstride= s->mb_stride;
251         CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
252     }
253
254     /* It might be nicer if the application would keep track of these
255      * but it would require an API change. */
256     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
257     s->prev_pict_types[0]= s->pict_type;
258     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
259         pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway.
260
261     return 0;
262 fail: //for the CHECKED_ALLOCZ macro
263     if(r>=0)
264         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
265     return -1;
266 }
267
268 /**
269  * deallocates a picture
270  */
271 static void free_picture(MpegEncContext *s, Picture *pic){
272     int i;
273
274     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
275         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
276     }
277
278     av_freep(&pic->mb_var);
279     av_freep(&pic->mc_mb_var);
280     av_freep(&pic->mb_mean);
281     av_freep(&pic->mbskip_table);
282     av_freep(&pic->qscale_table);
283     av_freep(&pic->mb_type_base);
284     av_freep(&pic->dct_coeff);
285     av_freep(&pic->pan_scan);
286     pic->mb_type= NULL;
287     for(i=0; i<2; i++){
288         av_freep(&pic->motion_val_base[i]);
289         av_freep(&pic->ref_index[i]);
290     }
291
292     if(pic->type == FF_BUFFER_TYPE_SHARED){
293         for(i=0; i<4; i++){
294             pic->base[i]=
295             pic->data[i]= NULL;
296         }
297         pic->type= 0;
298     }
299 }
300
301 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
302     int i;
303
304     // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
305     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
306     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
307
308      //FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
309     CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
310     s->rd_scratchpad=   s->me.scratchpad;
311     s->b_scratchpad=    s->me.scratchpad;
312     s->obmc_scratchpad= s->me.scratchpad + 16;
313     if (s->encoding) {
314         CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
315         CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
316         if(s->avctx->noise_reduction){
317             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
318         }
319     }
320     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
321     s->block= s->blocks[0];
322
323     for(i=0;i<12;i++){
324         s->pblocks[i] = (short *)(&s->block[i]);
325     }
326     return 0;
327 fail:
328     return -1; //free() through MPV_common_end()
329 }
330
331 static void free_duplicate_context(MpegEncContext *s){
332     if(s==NULL) return;
333
334     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
335     av_freep(&s->me.scratchpad);
336     s->rd_scratchpad=
337     s->b_scratchpad=
338     s->obmc_scratchpad= NULL;
339
340     av_freep(&s->dct_error_sum);
341     av_freep(&s->me.map);
342     av_freep(&s->me.score_map);
343     av_freep(&s->blocks);
344     s->block= NULL;
345 }
346
347 static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
348 #define COPY(a) bak->a= src->a
349     COPY(allocated_edge_emu_buffer);
350     COPY(edge_emu_buffer);
351     COPY(me.scratchpad);
352     COPY(rd_scratchpad);
353     COPY(b_scratchpad);
354     COPY(obmc_scratchpad);
355     COPY(me.map);
356     COPY(me.score_map);
357     COPY(blocks);
358     COPY(block);
359     COPY(start_mb_y);
360     COPY(end_mb_y);
361     COPY(me.map_generation);
362     COPY(pb);
363     COPY(dct_error_sum);
364     COPY(dct_count[0]);
365     COPY(dct_count[1]);
366 #undef COPY
367 }
368
369 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
370     MpegEncContext bak;
371     int i;
372     //FIXME copy only needed parts
373 //START_TIMER
374     backup_duplicate_context(&bak, dst);
375     memcpy(dst, src, sizeof(MpegEncContext));
376     backup_duplicate_context(dst, &bak);
377     for(i=0;i<12;i++){
378         dst->pblocks[i] = (short *)(&dst->block[i]);
379     }
380 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
381 }
382
383 /**
384  * sets the given MpegEncContext to common defaults (same for encoding and decoding).
385  * the changed fields will not depend upon the prior state of the MpegEncContext.
386  */
387 void MPV_common_defaults(MpegEncContext *s){
388     s->y_dc_scale_table=
389     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
390     s->chroma_qscale_table= ff_default_chroma_qscale_table;
391     s->progressive_frame= 1;
392     s->progressive_sequence= 1;
393     s->picture_structure= PICT_FRAME;
394
395     s->coded_picture_number = 0;
396     s->picture_number = 0;
397     s->input_picture_number = 0;
398
399     s->picture_in_gop_number = 0;
400
401     s->f_code = 1;
402     s->b_code = 1;
403 }
404
405 /**
406  * sets the given MpegEncContext to defaults for decoding.
407  * the changed fields will not depend upon the prior state of the MpegEncContext.
408  */
409 void MPV_decode_defaults(MpegEncContext *s){
410     MPV_common_defaults(s);
411 }
412
413 /**
414  * init common structure for both encoder and decoder.
415  * this assumes that some variables like width/height are already set
416  */
417 int MPV_common_init(MpegEncContext *s)
418 {
419     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y, threads;
420
421     s->mb_height = (s->height + 15) / 16;
422
423     if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
424         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
425         return -1;
426     }
427
428     if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
429         return -1;
430
431     dsputil_init(&s->dsp, s->avctx);
432     ff_dct_common_init(s);
433
434     s->flags= s->avctx->flags;
435     s->flags2= s->avctx->flags2;
436
437     s->mb_width  = (s->width  + 15) / 16;
438     s->mb_stride = s->mb_width + 1;
439     s->b8_stride = s->mb_width*2 + 1;
440     s->b4_stride = s->mb_width*4 + 1;
441     mb_array_size= s->mb_height * s->mb_stride;
442     mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
443
444     /* set chroma shifts */
445     avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
446                                                     &(s->chroma_y_shift) );
447
448     /* set default edge pos, will be overriden in decode_header if needed */
449     s->h_edge_pos= s->mb_width*16;
450     s->v_edge_pos= s->mb_height*16;
451
452     s->mb_num = s->mb_width * s->mb_height;
453
454     s->block_wrap[0]=
455     s->block_wrap[1]=
456     s->block_wrap[2]=
457     s->block_wrap[3]= s->b8_stride;
458     s->block_wrap[4]=
459     s->block_wrap[5]= s->mb_stride;
460
461     y_size = s->b8_stride * (2 * s->mb_height + 1);
462     c_size = s->mb_stride * (s->mb_height + 1);
463     yc_size = y_size + 2 * c_size;
464
465     /* convert fourcc to upper case */
466     s->codec_tag=          toupper( s->avctx->codec_tag     &0xFF)
467                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
468                         + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
469                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
470
471     s->stream_codec_tag=          toupper( s->avctx->stream_codec_tag     &0xFF)
472                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
473                                + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
474                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
475
476     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
477
478     CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
479     for(y=0; y<s->mb_height; y++){
480         for(x=0; x<s->mb_width; x++){
481             s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
482         }
483     }
484     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
485
486     if (s->encoding) {
487         /* Allocate MV tables */
488         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
489         CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
490         CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
491         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
492         CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
493         CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
494         s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
495         s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
496         s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
497         s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
498         s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
499         s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
500
501         if(s->msmpeg4_version){
502             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
503         }
504         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
505
506         /* Allocate MB type table */
507         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
508
509         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
510
511         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
512         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
513         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
514         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
515         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
516         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
517
518         if(s->avctx->noise_reduction){
519             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
520         }
521     }
522     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
523
524     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
525
526     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
527         /* interlaced direct mode decoding tables */
528             for(i=0; i<2; i++){
529                 int j, k;
530                 for(j=0; j<2; j++){
531                     for(k=0; k<2; k++){
532                         CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
533                         s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
534                     }
535                     CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
536                     CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
537                     s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
538                 }
539                 CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
540             }
541     }
542     if (s->out_format == FMT_H263) {
543         /* ac values */
544         CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
545         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
546         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
547         s->ac_val[2] = s->ac_val[1] + c_size;
548
549         /* cbp values */
550         CHECKED_ALLOCZ(s->coded_block_base, y_size);
551         s->coded_block= s->coded_block_base + s->b8_stride + 1;
552
553         /* cbp, ac_pred, pred_dir */
554         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
555         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
556     }
557
558     if (s->h263_pred || s->h263_plus || !s->encoding) {
559         /* dc values */
560         //MN: we need these for error resilience of intra-frames
561         CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
562         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
563         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
564         s->dc_val[2] = s->dc_val[1] + c_size;
565         for(i=0;i<yc_size;i++)
566             s->dc_val_base[i] = 1024;
567     }
568
569     /* which mb is a intra block */
570     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
571     memset(s->mbintra_table, 1, mb_array_size);
572
573     /* init macroblock skip table */
574     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
575     //Note the +1 is for a quicker mpeg4 slice_end detection
576     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
577
578     s->parse_context.state= -1;
579     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
580        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
581        s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
582        s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
583     }
584
585     s->context_initialized = 1;
586
587     s->thread_context[0]= s;
588     threads = s->avctx->thread_count;
589
590     for(i=1; i<threads; i++){
591         s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
592         memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
593     }
594
595     for(i=0; i<threads; i++){
596         if(init_duplicate_context(s->thread_context[i], s) < 0)
597            goto fail;
598         s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
599         s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
600     }
601
602     return 0;
603  fail:
604     MPV_common_end(s);
605     return -1;
606 }
607
608 /* init common structure for both encoder and decoder */
609 void MPV_common_end(MpegEncContext *s)
610 {
611     int i, j, k;
612
613     for(i=0; i<s->avctx->thread_count; i++){
614         free_duplicate_context(s->thread_context[i]);
615     }
616     for(i=1; i<s->avctx->thread_count; i++){
617         av_freep(&s->thread_context[i]);
618     }
619
620     av_freep(&s->parse_context.buffer);
621     s->parse_context.buffer_size=0;
622
623     av_freep(&s->mb_type);
624     av_freep(&s->p_mv_table_base);
625     av_freep(&s->b_forw_mv_table_base);
626     av_freep(&s->b_back_mv_table_base);
627     av_freep(&s->b_bidir_forw_mv_table_base);
628     av_freep(&s->b_bidir_back_mv_table_base);
629     av_freep(&s->b_direct_mv_table_base);
630     s->p_mv_table= NULL;
631     s->b_forw_mv_table= NULL;
632     s->b_back_mv_table= NULL;
633     s->b_bidir_forw_mv_table= NULL;
634     s->b_bidir_back_mv_table= NULL;
635     s->b_direct_mv_table= NULL;
636     for(i=0; i<2; i++){
637         for(j=0; j<2; j++){
638             for(k=0; k<2; k++){
639                 av_freep(&s->b_field_mv_table_base[i][j][k]);
640                 s->b_field_mv_table[i][j][k]=NULL;
641             }
642             av_freep(&s->b_field_select_table[i][j]);
643             av_freep(&s->p_field_mv_table_base[i][j]);
644             s->p_field_mv_table[i][j]=NULL;
645         }
646         av_freep(&s->p_field_select_table[i]);
647     }
648
649     av_freep(&s->dc_val_base);
650     av_freep(&s->ac_val_base);
651     av_freep(&s->coded_block_base);
652     av_freep(&s->mbintra_table);
653     av_freep(&s->cbp_table);
654     av_freep(&s->pred_dir_table);
655
656     av_freep(&s->mbskip_table);
657     av_freep(&s->prev_pict_types);
658     av_freep(&s->bitstream_buffer);
659     s->allocated_bitstream_buffer_size=0;
660
661     av_freep(&s->avctx->stats_out);
662     av_freep(&s->ac_stats);
663     av_freep(&s->error_status_table);
664     av_freep(&s->mb_index2xy);
665     av_freep(&s->lambda_table);
666     av_freep(&s->q_intra_matrix);
667     av_freep(&s->q_inter_matrix);
668     av_freep(&s->q_intra_matrix16);
669     av_freep(&s->q_inter_matrix16);
670     av_freep(&s->input_picture);
671     av_freep(&s->reordered_input_picture);
672     av_freep(&s->dct_offset);
673
674     if(s->picture){
675         for(i=0; i<MAX_PICTURE_COUNT; i++){
676             free_picture(s, &s->picture[i]);
677         }
678     }
679     av_freep(&s->picture);
680     s->context_initialized = 0;
681     s->last_picture_ptr=
682     s->next_picture_ptr=
683     s->current_picture_ptr= NULL;
684     s->linesize= s->uvlinesize= 0;
685
686     for(i=0; i<3; i++)
687         av_freep(&s->visualization_buffer[i]);
688
689     avcodec_default_free_buffers(s->avctx);
690 }
691
692 void init_rl(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3])
693 {
694     int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
695     uint8_t index_run[MAX_RUN+1];
696     int last, run, level, start, end, i;
697
698     /* If table is static, we can quit if rl->max_level[0] is not NULL */
699     if(static_store && rl->max_level[0])
700         return;
701
702     /* compute max_level[], max_run[] and index_run[] */
703     for(last=0;last<2;last++) {
704         if (last == 0) {
705             start = 0;
706             end = rl->last;
707         } else {
708             start = rl->last;
709             end = rl->n;
710         }
711
712         memset(max_level, 0, MAX_RUN + 1);
713         memset(max_run, 0, MAX_LEVEL + 1);
714         memset(index_run, rl->n, MAX_RUN + 1);
715         for(i=start;i<end;i++) {
716             run = rl->table_run[i];
717             level = rl->table_level[i];
718             if (index_run[run] == rl->n)
719                 index_run[run] = i;
720             if (level > max_level[run])
721                 max_level[run] = level;
722             if (run > max_run[level])
723                 max_run[level] = run;
724         }
725         if(static_store)
726             rl->max_level[last] = static_store[last];
727         else
728             rl->max_level[last] = av_malloc(MAX_RUN + 1);
729         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
730         if(static_store)
731             rl->max_run[last] = static_store[last] + MAX_RUN + 1;
732         else
733             rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
734         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
735         if(static_store)
736             rl->index_run[last] = static_store[last] + MAX_RUN + MAX_LEVEL + 2;
737         else
738             rl->index_run[last] = av_malloc(MAX_RUN + 1);
739         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
740     }
741 }
742
743 void init_vlc_rl(RLTable *rl, int use_static)
744 {
745     int i, q;
746
747     /* Return if static table is already initialized */
748     if(use_static && rl->rl_vlc[0])
749         return;
750
751     init_vlc(&rl->vlc, 9, rl->n + 1,
752              &rl->table_vlc[0][1], 4, 2,
753              &rl->table_vlc[0][0], 4, 2, use_static);
754
755
756     for(q=0; q<32; q++){
757         int qmul= q*2;
758         int qadd= (q-1)|1;
759
760         if(q==0){
761             qmul=1;
762             qadd=0;
763         }
764         if(use_static)
765             rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
766         else
767             rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
768         for(i=0; i<rl->vlc.table_size; i++){
769             int code= rl->vlc.table[i][0];
770             int len = rl->vlc.table[i][1];
771             int level, run;
772
773             if(len==0){ // illegal code
774                 run= 66;
775                 level= MAX_LEVEL;
776             }else if(len<0){ //more bits needed
777                 run= 0;
778                 level= code;
779             }else{
780                 if(code==rl->n){ //esc
781                     run= 66;
782                     level= 0;
783                 }else{
784                     run=   rl->table_run  [code] + 1;
785                     level= rl->table_level[code] * qmul + qadd;
786                     if(code >= rl->last) run+=192;
787                 }
788             }
789             rl->rl_vlc[q][i].len= len;
790             rl->rl_vlc[q][i].level= level;
791             rl->rl_vlc[q][i].run= run;
792         }
793     }
794 }
795
796 /* draw the edges of width 'w' of an image of size width, height */
797 //FIXME check that this is ok for mpeg4 interlaced
798 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
799 {
800     uint8_t *ptr, *last_line;
801     int i;
802
803     last_line = buf + (height - 1) * wrap;
804     for(i=0;i<w;i++) {
805         /* top and bottom */
806         memcpy(buf - (i + 1) * wrap, buf, width);
807         memcpy(last_line + (i + 1) * wrap, last_line, width);
808     }
809     /* left and right */
810     ptr = buf;
811     for(i=0;i<height;i++) {
812         memset(ptr - w, ptr[0], w);
813         memset(ptr + width, ptr[width-1], w);
814         ptr += wrap;
815     }
816     /* corners */
817     for(i=0;i<w;i++) {
818         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
819         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
820         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
821         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
822     }
823 }
824
825 int ff_find_unused_picture(MpegEncContext *s, int shared){
826     int i;
827
828     if(shared){
829         for(i=0; i<MAX_PICTURE_COUNT; i++){
830             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
831         }
832     }else{
833         for(i=0; i<MAX_PICTURE_COUNT; i++){
834             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
835         }
836         for(i=0; i<MAX_PICTURE_COUNT; i++){
837             if(s->picture[i].data[0]==NULL) return i;
838         }
839     }
840
841     av_log(s->avctx, AV_LOG_FATAL, "Internal error, picture buffer overflow\n");
842     /* We could return -1, but the codec would crash trying to draw into a
843      * non-existing frame anyway. This is safer than waiting for a random crash.
844      * Also the return of this is never useful, an encoder must only allocate
845      * as much as allowed in the specification. This has no relationship to how
846      * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
847      * enough for such valid streams).
848      * Plus, a decoder has to check stream validity and remove frames if too
849      * many reference frames are around. Waiting for "OOM" is not correct at
850      * all. Similarly, missing reference frames have to be replaced by
851      * interpolated/MC frames, anything else is a bug in the codec ...
852      */
853     abort();
854     return -1;
855 }
856
857 static void update_noise_reduction(MpegEncContext *s){
858     int intra, i;
859
860     for(intra=0; intra<2; intra++){
861         if(s->dct_count[intra] > (1<<16)){
862             for(i=0; i<64; i++){
863                 s->dct_error_sum[intra][i] >>=1;
864             }
865             s->dct_count[intra] >>= 1;
866         }
867
868         for(i=0; i<64; i++){
869             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
870         }
871     }
872 }
873
874 /**
875  * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
876  */
877 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
878 {
879     int i;
880     AVFrame *pic;
881     s->mb_skipped = 0;
882
883     assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
884
885     /* mark&release old frames */
886     if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
887       if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
888         avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
889
890         /* release forgotten pictures */
891         /* if(mpeg124/h263) */
892         if(!s->encoding){
893             for(i=0; i<MAX_PICTURE_COUNT; i++){
894                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
895                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
896                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
897                 }
898             }
899         }
900       }
901     }
902 alloc:
903     if(!s->encoding){
904         /* release non reference frames */
905         for(i=0; i<MAX_PICTURE_COUNT; i++){
906             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
907                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
908             }
909         }
910
911         if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
912             pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
913         else{
914             i= ff_find_unused_picture(s, 0);
915             pic= (AVFrame*)&s->picture[i];
916         }
917
918         pic->reference= 0;
919         if (!s->dropable){
920             if (s->codec_id == CODEC_ID_H264)
921                 pic->reference = s->picture_structure;
922             else if (s->pict_type != B_TYPE)
923                 pic->reference = 3;
924         }
925
926         pic->coded_picture_number= s->coded_picture_number++;
927
928         if( alloc_picture(s, (Picture*)pic, 0) < 0)
929             return -1;
930
931         s->current_picture_ptr= (Picture*)pic;
932         s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
933         s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
934     }
935
936     s->current_picture_ptr->pict_type= s->pict_type;
937 //    if(s->flags && CODEC_FLAG_QSCALE)
938   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
939     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
940
941     copy_picture(&s->current_picture, s->current_picture_ptr);
942
943     if (s->pict_type != B_TYPE) {
944         s->last_picture_ptr= s->next_picture_ptr;
945         if(!s->dropable)
946             s->next_picture_ptr= s->current_picture_ptr;
947     }
948 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
949         s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
950         s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
951         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
952         s->pict_type, s->dropable);*/
953
954     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
955     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
956
957     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL) && !s->dropable){
958         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
959         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
960         goto alloc;
961     }
962
963     assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
964
965     if(s->picture_structure!=PICT_FRAME && s->out_format != FMT_H264){
966         int i;
967         for(i=0; i<4; i++){
968             if(s->picture_structure == PICT_BOTTOM_FIELD){
969                  s->current_picture.data[i] += s->current_picture.linesize[i];
970             }
971             s->current_picture.linesize[i] *= 2;
972             s->last_picture.linesize[i] *=2;
973             s->next_picture.linesize[i] *=2;
974         }
975     }
976
977     s->hurry_up= s->avctx->hurry_up;
978     s->error_resilience= avctx->error_resilience;
979
980     /* set dequantizer, we can't do it during init as it might change for mpeg4
981        and we can't do it in the header decode as init is not called for mpeg4 there yet */
982     if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
983         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
984         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
985     }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
986         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
987         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
988     }else{
989         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
990         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
991     }
992
993     if(s->dct_error_sum){
994         assert(s->avctx->noise_reduction && s->encoding);
995
996         update_noise_reduction(s);
997     }
998
999 #ifdef HAVE_XVMC
1000     if(s->avctx->xvmc_acceleration)
1001         return XVMC_field_start(s, avctx);
1002 #endif
1003     return 0;
1004 }
1005
1006 /* generic function for encode/decode called after a frame has been coded/decoded */
1007 void MPV_frame_end(MpegEncContext *s)
1008 {
1009     int i;
1010     /* draw edge for correct motion prediction if outside */
1011 #ifdef HAVE_XVMC
1012 //just to make sure that all data is rendered.
1013     if(s->avctx->xvmc_acceleration){
1014         XVMC_field_end(s);
1015     }else
1016 #endif
1017     if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1018             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1019             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1020             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1021     }
1022     emms_c();
1023
1024     s->last_pict_type    = s->pict_type;
1025     s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1026     if(s->pict_type!=B_TYPE){
1027         s->last_non_b_pict_type= s->pict_type;
1028     }
1029 #if 0
1030         /* copy back current_picture variables */
1031     for(i=0; i<MAX_PICTURE_COUNT; i++){
1032         if(s->picture[i].data[0] == s->current_picture.data[0]){
1033             s->picture[i]= s->current_picture;
1034             break;
1035         }
1036     }
1037     assert(i<MAX_PICTURE_COUNT);
1038 #endif
1039
1040     if(s->encoding){
1041         /* release non-reference frames */
1042         for(i=0; i<MAX_PICTURE_COUNT; i++){
1043             if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1044                 s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1045             }
1046         }
1047     }
1048     // clear copies, to avoid confusion
1049 #if 0
1050     memset(&s->last_picture, 0, sizeof(Picture));
1051     memset(&s->next_picture, 0, sizeof(Picture));
1052     memset(&s->current_picture, 0, sizeof(Picture));
1053 #endif
1054     s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1055 }
1056
1057 /**
1058  * draws an line from (ex, ey) -> (sx, sy).
1059  * @param w width of the image
1060  * @param h height of the image
1061  * @param stride stride/linesize of the image
1062  * @param color color of the arrow
1063  */
1064 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1065     int x, y, fr, f;
1066
1067     sx= av_clip(sx, 0, w-1);
1068     sy= av_clip(sy, 0, h-1);
1069     ex= av_clip(ex, 0, w-1);
1070     ey= av_clip(ey, 0, h-1);
1071
1072     buf[sy*stride + sx]+= color;
1073
1074     if(FFABS(ex - sx) > FFABS(ey - sy)){
1075         if(sx > ex){
1076             FFSWAP(int, sx, ex);
1077             FFSWAP(int, sy, ey);
1078         }
1079         buf+= sx + sy*stride;
1080         ex-= sx;
1081         f= ((ey-sy)<<16)/ex;
1082         for(x= 0; x <= ex; x++){
1083             y = (x*f)>>16;
1084             fr= (x*f)&0xFFFF;
1085             buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1086             buf[(y+1)*stride + x]+= (color*         fr )>>16;
1087         }
1088     }else{
1089         if(sy > ey){
1090             FFSWAP(int, sx, ex);
1091             FFSWAP(int, sy, ey);
1092         }
1093         buf+= sx + sy*stride;
1094         ey-= sy;
1095         if(ey) f= ((ex-sx)<<16)/ey;
1096         else   f= 0;
1097         for(y= 0; y <= ey; y++){
1098             x = (y*f)>>16;
1099             fr= (y*f)&0xFFFF;
1100             buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;
1101             buf[y*stride + x+1]+= (color*         fr )>>16;
1102         }
1103     }
1104 }
1105
1106 /**
1107  * draws an arrow from (ex, ey) -> (sx, sy).
1108  * @param w width of the image
1109  * @param h height of the image
1110  * @param stride stride/linesize of the image
1111  * @param color color of the arrow
1112  */
1113 static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1114     int dx,dy;
1115
1116     sx= av_clip(sx, -100, w+100);
1117     sy= av_clip(sy, -100, h+100);
1118     ex= av_clip(ex, -100, w+100);
1119     ey= av_clip(ey, -100, h+100);
1120
1121     dx= ex - sx;
1122     dy= ey - sy;
1123
1124     if(dx*dx + dy*dy > 3*3){
1125         int rx=  dx + dy;
1126         int ry= -dx + dy;
1127         int length= ff_sqrt((rx*rx + ry*ry)<<8);
1128
1129         //FIXME subpixel accuracy
1130         rx= ROUNDED_DIV(rx*3<<4, length);
1131         ry= ROUNDED_DIV(ry*3<<4, length);
1132
1133         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1134         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1135     }
1136     draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1137 }
1138
1139 /**
1140  * prints debuging info for the given picture.
1141  */
1142 void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1143
1144     if(!pict || !pict->mb_type) return;
1145
1146     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1147         int x,y;
1148
1149         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1150         switch (pict->pict_type) {
1151             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1152             case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1153             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1154             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1155             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1156             case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1157         }
1158         for(y=0; y<s->mb_height; y++){
1159             for(x=0; x<s->mb_width; x++){
1160                 if(s->avctx->debug&FF_DEBUG_SKIP){
1161                     int count= s->mbskip_table[x + y*s->mb_stride];
1162                     if(count>9) count=9;
1163                     av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1164                 }
1165                 if(s->avctx->debug&FF_DEBUG_QP){
1166                     av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1167                 }
1168                 if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1169                     int mb_type= pict->mb_type[x + y*s->mb_stride];
1170                     //Type & MV direction
1171                     if(IS_PCM(mb_type))
1172                         av_log(s->avctx, AV_LOG_DEBUG, "P");
1173                     else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1174                         av_log(s->avctx, AV_LOG_DEBUG, "A");
1175                     else if(IS_INTRA4x4(mb_type))
1176                         av_log(s->avctx, AV_LOG_DEBUG, "i");
1177                     else if(IS_INTRA16x16(mb_type))
1178                         av_log(s->avctx, AV_LOG_DEBUG, "I");
1179                     else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1180                         av_log(s->avctx, AV_LOG_DEBUG, "d");
1181                     else if(IS_DIRECT(mb_type))
1182                         av_log(s->avctx, AV_LOG_DEBUG, "D");
1183                     else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1184                         av_log(s->avctx, AV_LOG_DEBUG, "g");
1185                     else if(IS_GMC(mb_type))
1186                         av_log(s->avctx, AV_LOG_DEBUG, "G");
1187                     else if(IS_SKIP(mb_type))
1188                         av_log(s->avctx, AV_LOG_DEBUG, "S");
1189                     else if(!USES_LIST(mb_type, 1))
1190                         av_log(s->avctx, AV_LOG_DEBUG, ">");
1191                     else if(!USES_LIST(mb_type, 0))
1192                         av_log(s->avctx, AV_LOG_DEBUG, "<");
1193                     else{
1194                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1195                         av_log(s->avctx, AV_LOG_DEBUG, "X");
1196                     }
1197
1198                     //segmentation
1199                     if(IS_8X8(mb_type))
1200                         av_log(s->avctx, AV_LOG_DEBUG, "+");
1201                     else if(IS_16X8(mb_type))
1202                         av_log(s->avctx, AV_LOG_DEBUG, "-");
1203                     else if(IS_8X16(mb_type))
1204                         av_log(s->avctx, AV_LOG_DEBUG, "|");
1205                     else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1206                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1207                     else
1208                         av_log(s->avctx, AV_LOG_DEBUG, "?");
1209
1210
1211                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1212                         av_log(s->avctx, AV_LOG_DEBUG, "=");
1213                     else
1214                         av_log(s->avctx, AV_LOG_DEBUG, " ");
1215                 }
1216 //                av_log(s->avctx, AV_LOG_DEBUG, " ");
1217             }
1218             av_log(s->avctx, AV_LOG_DEBUG, "\n");
1219         }
1220     }
1221
1222     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1223         const int shift= 1 + s->quarter_sample;
1224         int mb_y;
1225         uint8_t *ptr;
1226         int i;
1227         int h_chroma_shift, v_chroma_shift;
1228         const int width = s->avctx->width;
1229         const int height= s->avctx->height;
1230         const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1231         const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1232         s->low_delay=0; //needed to see the vectors without trashing the buffers
1233
1234         avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1235         for(i=0; i<3; i++){
1236             memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1237             pict->data[i]= s->visualization_buffer[i];
1238         }
1239         pict->type= FF_BUFFER_TYPE_COPY;
1240         ptr= pict->data[0];
1241
1242         for(mb_y=0; mb_y<s->mb_height; mb_y++){
1243             int mb_x;
1244             for(mb_x=0; mb_x<s->mb_width; mb_x++){
1245                 const int mb_index= mb_x + mb_y*s->mb_stride;
1246                 if((s->avctx->debug_mv) && pict->motion_val){
1247                   int type;
1248                   for(type=0; type<3; type++){
1249                     int direction = 0;
1250                     switch (type) {
1251                       case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1252                                 continue;
1253                               direction = 0;
1254                               break;
1255                       case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1256                                 continue;
1257                               direction = 0;
1258                               break;
1259                       case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1260                                 continue;
1261                               direction = 1;
1262                               break;
1263                     }
1264                     if(!USES_LIST(pict->mb_type[mb_index], direction))
1265                         continue;
1266
1267                     if(IS_8X8(pict->mb_type[mb_index])){
1268                       int i;
1269                       for(i=0; i<4; i++){
1270                         int sx= mb_x*16 + 4 + 8*(i&1);
1271                         int sy= mb_y*16 + 4 + 8*(i>>1);
1272                         int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1273                         int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1274                         int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1275                         draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1276                       }
1277                     }else if(IS_16X8(pict->mb_type[mb_index])){
1278                       int i;
1279                       for(i=0; i<2; i++){
1280                         int sx=mb_x*16 + 8;
1281                         int sy=mb_y*16 + 4 + 8*i;
1282                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1283                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1284                         int my=(pict->motion_val[direction][xy][1]>>shift);
1285
1286                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1287                             my*=2;
1288
1289                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1290                       }
1291                     }else if(IS_8X16(pict->mb_type[mb_index])){
1292                       int i;
1293                       for(i=0; i<2; i++){
1294                         int sx=mb_x*16 + 4 + 8*i;
1295                         int sy=mb_y*16 + 8;
1296                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1297                         int mx=(pict->motion_val[direction][xy][0]>>shift);
1298                         int my=(pict->motion_val[direction][xy][1]>>shift);
1299
1300                         if(IS_INTERLACED(pict->mb_type[mb_index]))
1301                             my*=2;
1302
1303                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1304                       }
1305                     }else{
1306                       int sx= mb_x*16 + 8;
1307                       int sy= mb_y*16 + 8;
1308                       int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1309                       int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1310                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1311                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1312                     }
1313                   }
1314                 }
1315                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1316                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1317                     int y;
1318                     for(y=0; y<8; y++){
1319                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1320                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1321                     }
1322                 }
1323                 if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1324                     int mb_type= pict->mb_type[mb_index];
1325                     uint64_t u,v;
1326                     int y;
1327 #define COLOR(theta, r)\
1328 u= (int)(128 + r*cos(theta*3.141592/180));\
1329 v= (int)(128 + r*sin(theta*3.141592/180));
1330
1331
1332                     u=v=128;
1333                     if(IS_PCM(mb_type)){
1334                         COLOR(120,48)
1335                     }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1336                         COLOR(30,48)
1337                     }else if(IS_INTRA4x4(mb_type)){
1338                         COLOR(90,48)
1339                     }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1340 //                        COLOR(120,48)
1341                     }else if(IS_DIRECT(mb_type)){
1342                         COLOR(150,48)
1343                     }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1344                         COLOR(170,48)
1345                     }else if(IS_GMC(mb_type)){
1346                         COLOR(190,48)
1347                     }else if(IS_SKIP(mb_type)){
1348 //                        COLOR(180,48)
1349                     }else if(!USES_LIST(mb_type, 1)){
1350                         COLOR(240,48)
1351                     }else if(!USES_LIST(mb_type, 0)){
1352                         COLOR(0,48)
1353                     }else{
1354                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1355                         COLOR(300,48)
1356                     }
1357
1358                     u*= 0x0101010101010101ULL;
1359                     v*= 0x0101010101010101ULL;
1360                     for(y=0; y<8; y++){
1361                         *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1362                         *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1363                     }
1364
1365                     //segmentation
1366                     if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1367                         *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1368                         *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1369                     }
1370                     if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1371                         for(y=0; y<16; y++)
1372                             pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1373                     }
1374                     if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1375                         int dm= 1 << (mv_sample_log2-2);
1376                         for(i=0; i<4; i++){
1377                             int sx= mb_x*16 + 8*(i&1);
1378                             int sy= mb_y*16 + 8*(i>>1);
1379                             int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1380                             //FIXME bidir
1381                             int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1382                             if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1383                                 for(y=0; y<8; y++)
1384                                     pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1385                             if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1386                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1387                         }
1388                     }
1389
1390                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1391                         // hmm
1392                     }
1393                 }
1394                 s->mbskip_table[mb_index]=0;
1395             }
1396         }
1397     }
1398 }
1399
1400 /**
1401  * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
1402  * @param buf destination buffer
1403  * @param src source buffer
1404  * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
1405  * @param block_w width of block
1406  * @param block_h height of block
1407  * @param src_x x coordinate of the top left sample of the block in the source buffer
1408  * @param src_y y coordinate of the top left sample of the block in the source buffer
1409  * @param w width of the source buffer
1410  * @param h height of the source buffer
1411  */
1412 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
1413                                     int src_x, int src_y, int w, int h){
1414     int x, y;
1415     int start_y, start_x, end_y, end_x;
1416
1417     if(src_y>= h){
1418         src+= (h-1-src_y)*linesize;
1419         src_y=h-1;
1420     }else if(src_y<=-block_h){
1421         src+= (1-block_h-src_y)*linesize;
1422         src_y=1-block_h;
1423     }
1424     if(src_x>= w){
1425         src+= (w-1-src_x);
1426         src_x=w-1;
1427     }else if(src_x<=-block_w){
1428         src+= (1-block_w-src_x);
1429         src_x=1-block_w;
1430     }
1431
1432     start_y= FFMAX(0, -src_y);
1433     start_x= FFMAX(0, -src_x);
1434     end_y= FFMIN(block_h, h-src_y);
1435     end_x= FFMIN(block_w, w-src_x);
1436
1437     // copy existing part
1438     for(y=start_y; y<end_y; y++){
1439         for(x=start_x; x<end_x; x++){
1440             buf[x + y*linesize]= src[x + y*linesize];
1441         }
1442     }
1443
1444     //top
1445     for(y=0; y<start_y; y++){
1446         for(x=start_x; x<end_x; x++){
1447             buf[x + y*linesize]= buf[x + start_y*linesize];
1448         }
1449     }
1450
1451     //bottom
1452     for(y=end_y; y<block_h; y++){
1453         for(x=start_x; x<end_x; x++){
1454             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1455         }
1456     }
1457
1458     for(y=0; y<block_h; y++){
1459        //left
1460         for(x=0; x<start_x; x++){
1461             buf[x + y*linesize]= buf[start_x + y*linesize];
1462         }
1463
1464        //right
1465         for(x=end_x; x<block_w; x++){
1466             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1467         }
1468     }
1469 }
1470
1471 static inline int hpel_motion_lowres(MpegEncContext *s,
1472                                   uint8_t *dest, uint8_t *src,
1473                                   int field_based, int field_select,
1474                                   int src_x, int src_y,
1475                                   int width, int height, int stride,
1476                                   int h_edge_pos, int v_edge_pos,
1477                                   int w, int h, h264_chroma_mc_func *pix_op,
1478                                   int motion_x, int motion_y)
1479 {
1480     const int lowres= s->avctx->lowres;
1481     const int s_mask= (2<<lowres)-1;
1482     int emu=0;
1483     int sx, sy;
1484
1485     if(s->quarter_sample){
1486         motion_x/=2;
1487         motion_y/=2;
1488     }
1489
1490     sx= motion_x & s_mask;
1491     sy= motion_y & s_mask;
1492     src_x += motion_x >> (lowres+1);
1493     src_y += motion_y >> (lowres+1);
1494
1495     src += src_y * stride + src_x;
1496
1497     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
1498        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1499         ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
1500                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1501         src= s->edge_emu_buffer;
1502         emu=1;
1503     }
1504
1505     sx <<= 2 - lowres;
1506     sy <<= 2 - lowres;
1507     if(field_select)
1508         src += s->linesize;
1509     pix_op[lowres](dest, src, stride, h, sx, sy);
1510     return emu;
1511 }
1512
1513 /* apply one mpeg motion vector to the three components */
1514 static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
1515                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1516                                int field_based, int bottom_field, int field_select,
1517                                uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
1518                                int motion_x, int motion_y, int h)
1519 {
1520     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1521     int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
1522     const int lowres= s->avctx->lowres;
1523     const int block_s= 8>>lowres;
1524     const int s_mask= (2<<lowres)-1;
1525     const int h_edge_pos = s->h_edge_pos >> lowres;
1526     const int v_edge_pos = s->v_edge_pos >> lowres;
1527     linesize   = s->current_picture.linesize[0] << field_based;
1528     uvlinesize = s->current_picture.linesize[1] << field_based;
1529
1530     if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
1531         motion_x/=2;
1532         motion_y/=2;
1533     }
1534
1535     if(field_based){
1536         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
1537     }
1538
1539     sx= motion_x & s_mask;
1540     sy= motion_y & s_mask;
1541     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
1542     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
1543
1544     if (s->out_format == FMT_H263) {
1545         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
1546         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
1547         uvsrc_x = src_x>>1;
1548         uvsrc_y = src_y>>1;
1549     }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
1550         mx = motion_x / 4;
1551         my = motion_y / 4;
1552         uvsx = (2*mx) & s_mask;
1553         uvsy = (2*my) & s_mask;
1554         uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
1555         uvsrc_y = s->mb_y*block_s               + (my >> lowres);
1556     } else {
1557         mx = motion_x / 2;
1558         my = motion_y / 2;
1559         uvsx = mx & s_mask;
1560         uvsy = my & s_mask;
1561         uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
1562         uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
1563     }
1564
1565     ptr_y  = ref_picture[0] + src_y * linesize + src_x;
1566     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
1567     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
1568
1569     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
1570        || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
1571             ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
1572                              src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
1573             ptr_y = s->edge_emu_buffer;
1574             if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1575                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
1576                 ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
1577                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1578                 ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
1579                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
1580                 ptr_cb= uvbuf;
1581                 ptr_cr= uvbuf+16;
1582             }
1583     }
1584
1585     if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
1586         dest_y += s->linesize;
1587         dest_cb+= s->uvlinesize;
1588         dest_cr+= s->uvlinesize;
1589     }
1590
1591     if(field_select){
1592         ptr_y += s->linesize;
1593         ptr_cb+= s->uvlinesize;
1594         ptr_cr+= s->uvlinesize;
1595     }
1596
1597     sx <<= 2 - lowres;
1598     sy <<= 2 - lowres;
1599     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
1600
1601     if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1602         uvsx <<= 2 - lowres;
1603         uvsy <<= 2 - lowres;
1604         pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1605         pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
1606     }
1607     //FIXME h261 lowres loop filter
1608 }
1609
1610 static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
1611                                      uint8_t *dest_cb, uint8_t *dest_cr,
1612                                      uint8_t **ref_picture,
1613                                      h264_chroma_mc_func *pix_op,
1614                                      int mx, int my){
1615     const int lowres= s->avctx->lowres;
1616     const int block_s= 8>>lowres;
1617     const int s_mask= (2<<lowres)-1;
1618     const int h_edge_pos = s->h_edge_pos >> (lowres+1);
1619     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
1620     int emu=0, src_x, src_y, offset, sx, sy;
1621     uint8_t *ptr;
1622
1623     if(s->quarter_sample){
1624         mx/=2;
1625         my/=2;
1626     }
1627
1628     /* In case of 8X8, we construct a single chroma motion vector
1629        with a special rounding */
1630     mx= ff_h263_round_chroma(mx);
1631     my= ff_h263_round_chroma(my);
1632
1633     sx= mx & s_mask;
1634     sy= my & s_mask;
1635     src_x = s->mb_x*block_s + (mx >> (lowres+1));
1636     src_y = s->mb_y*block_s + (my >> (lowres+1));
1637
1638     offset = src_y * s->uvlinesize + src_x;
1639     ptr = ref_picture[1] + offset;
1640     if(s->flags&CODEC_FLAG_EMU_EDGE){
1641         if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
1642            || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
1643             ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1644             ptr= s->edge_emu_buffer;
1645             emu=1;
1646         }
1647     }
1648     sx <<= 2 - lowres;
1649     sy <<= 2 - lowres;
1650     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
1651
1652     ptr = ref_picture[2] + offset;
1653     if(emu){
1654         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
1655         ptr= s->edge_emu_buffer;
1656     }
1657     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
1658 }
1659
1660 /**
1661  * motion compensation of a single macroblock
1662  * @param s context
1663  * @param dest_y luma destination pointer
1664  * @param dest_cb chroma cb/u destination pointer
1665  * @param dest_cr chroma cr/v destination pointer
1666  * @param dir direction (0->forward, 1->backward)
1667  * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
1668  * @param pic_op halfpel motion compensation function (average or put normally)
1669  * the motion vectors are taken from s->mv and the MV type from s->mv_type
1670  */
1671 static inline void MPV_motion_lowres(MpegEncContext *s,
1672                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1673                               int dir, uint8_t **ref_picture,
1674                               h264_chroma_mc_func *pix_op)
1675 {
1676     int mx, my;
1677     int mb_x, mb_y, i;
1678     const int lowres= s->avctx->lowres;
1679     const int block_s= 8>>lowres;
1680
1681     mb_x = s->mb_x;
1682     mb_y = s->mb_y;
1683
1684     switch(s->mv_type) {
1685     case MV_TYPE_16X16:
1686         mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1687                     0, 0, 0,
1688                     ref_picture, pix_op,
1689                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1690         break;
1691     case MV_TYPE_8X8:
1692         mx = 0;
1693         my = 0;
1694             for(i=0;i<4;i++) {
1695                 hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
1696                             ref_picture[0], 0, 0,
1697                             (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
1698                             s->width, s->height, s->linesize,
1699                             s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
1700                             block_s, block_s, pix_op,
1701                             s->mv[dir][i][0], s->mv[dir][i][1]);
1702
1703                 mx += s->mv[dir][i][0];
1704                 my += s->mv[dir][i][1];
1705             }
1706
1707         if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY))
1708             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
1709         break;
1710     case MV_TYPE_FIELD:
1711         if (s->picture_structure == PICT_FRAME) {
1712             /* top field */
1713             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1714                         1, 0, s->field_select[dir][0],
1715                         ref_picture, pix_op,
1716                         s->mv[dir][0][0], s->mv[dir][0][1], block_s);
1717             /* bottom field */
1718             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1719                         1, 1, s->field_select[dir][1],
1720                         ref_picture, pix_op,
1721                         s->mv[dir][1][0], s->mv[dir][1][1], block_s);
1722         } else {
1723             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
1724                 ref_picture= s->current_picture_ptr->data;
1725             }
1726
1727             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1728                         0, 0, s->field_select[dir][0],
1729                         ref_picture, pix_op,
1730                         s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
1731         }
1732         break;
1733     case MV_TYPE_16X8:
1734         for(i=0; i<2; i++){
1735             uint8_t ** ref2picture;
1736
1737             if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
1738                 ref2picture= ref_picture;
1739             }else{
1740                 ref2picture= s->current_picture_ptr->data;
1741             }
1742
1743             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1744                         0, 0, s->field_select[dir][i],
1745                         ref2picture, pix_op,
1746                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
1747
1748             dest_y += 2*block_s*s->linesize;
1749             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1750             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
1751         }
1752         break;
1753     case MV_TYPE_DMV:
1754         if(s->picture_structure == PICT_FRAME){
1755             for(i=0; i<2; i++){
1756                 int j;
1757                 for(j=0; j<2; j++){
1758                     mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1759                                 1, j, j^i,
1760                                 ref_picture, pix_op,
1761                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
1762                 }
1763                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1764             }
1765         }else{
1766             for(i=0; i<2; i++){
1767                 mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
1768                             0, 0, s->picture_structure != i+1,
1769                             ref_picture, pix_op,
1770                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
1771
1772                 // after put we make avg of the same block
1773                 pix_op = s->dsp.avg_h264_chroma_pixels_tab;
1774
1775                 //opposite parity is always in the same frame if this is second field
1776                 if(!s->first_field){
1777                     ref_picture = s->current_picture_ptr->data;
1778                 }
1779             }
1780         }
1781     break;
1782     default: assert(0);
1783     }
1784 }
1785
1786 /* put block[] to dest[] */
1787 static inline void put_dct(MpegEncContext *s,
1788                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1789 {
1790     s->dct_unquantize_intra(s, block, i, qscale);
1791     s->dsp.idct_put (dest, line_size, block);
1792 }
1793
1794 /* add block[] to dest[] */
1795 static inline void add_dct(MpegEncContext *s,
1796                            DCTELEM *block, int i, uint8_t *dest, int line_size)
1797 {
1798     if (s->block_last_index[i] >= 0) {
1799         s->dsp.idct_add (dest, line_size, block);
1800     }
1801 }
1802
1803 static inline void add_dequant_dct(MpegEncContext *s,
1804                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
1805 {
1806     if (s->block_last_index[i] >= 0) {
1807         s->dct_unquantize_inter(s, block, i, qscale);
1808
1809         s->dsp.idct_add (dest, line_size, block);
1810     }
1811 }
1812
1813 /**
1814  * cleans dc, ac, coded_block for the current non intra MB
1815  */
1816 void ff_clean_intra_table_entries(MpegEncContext *s)
1817 {
1818     int wrap = s->b8_stride;
1819     int xy = s->block_index[0];
1820
1821     s->dc_val[0][xy           ] =
1822     s->dc_val[0][xy + 1       ] =
1823     s->dc_val[0][xy     + wrap] =
1824     s->dc_val[0][xy + 1 + wrap] = 1024;
1825     /* ac pred */
1826     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
1827     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
1828     if (s->msmpeg4_version>=3) {
1829         s->coded_block[xy           ] =
1830         s->coded_block[xy + 1       ] =
1831         s->coded_block[xy     + wrap] =
1832         s->coded_block[xy + 1 + wrap] = 0;
1833     }
1834     /* chroma */
1835     wrap = s->mb_stride;
1836     xy = s->mb_x + s->mb_y * wrap;
1837     s->dc_val[1][xy] =
1838     s->dc_val[2][xy] = 1024;
1839     /* ac pred */
1840     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
1841     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
1842
1843     s->mbintra_table[xy]= 0;
1844 }
1845
1846 /* generic function called after a macroblock has been parsed by the
1847    decoder or after it has been encoded by the encoder.
1848
1849    Important variables used:
1850    s->mb_intra : true if intra macroblock
1851    s->mv_dir   : motion vector direction
1852    s->mv_type  : motion vector type
1853    s->mv       : motion vector
1854    s->interlaced_dct : true if interlaced dct used (mpeg2)
1855  */
1856 static av_always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
1857 {
1858     int mb_x, mb_y;
1859     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
1860 #ifdef HAVE_XVMC
1861     if(s->avctx->xvmc_acceleration){
1862         XVMC_decode_mb(s);//xvmc uses pblocks
1863         return;
1864     }
1865 #endif
1866
1867     mb_x = s->mb_x;
1868     mb_y = s->mb_y;
1869
1870     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
1871        /* save DCT coefficients */
1872        int i,j;
1873        DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
1874        for(i=0; i<6; i++)
1875            for(j=0; j<64; j++)
1876                *dct++ = block[i][s->dsp.idct_permutation[j]];
1877     }
1878
1879     s->current_picture.qscale_table[mb_xy]= s->qscale;
1880
1881     /* update DC predictors for P macroblocks */
1882     if (!s->mb_intra) {
1883         if (s->h263_pred || s->h263_aic) {
1884             if(s->mbintra_table[mb_xy])
1885                 ff_clean_intra_table_entries(s);
1886         } else {
1887             s->last_dc[0] =
1888             s->last_dc[1] =
1889             s->last_dc[2] = 128 << s->intra_dc_precision;
1890         }
1891     }
1892     else if (s->h263_pred || s->h263_aic)
1893         s->mbintra_table[mb_xy]=1;
1894
1895     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
1896         uint8_t *dest_y, *dest_cb, *dest_cr;
1897         int dct_linesize, dct_offset;
1898         op_pixels_func (*op_pix)[4];
1899         qpel_mc_func (*op_qpix)[16];
1900         const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
1901         const int uvlinesize= s->current_picture.linesize[1];
1902         const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
1903         const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
1904
1905         /* avoid copy if macroblock skipped in last frame too */
1906         /* skip only during decoding as we might trash the buffers during encoding a bit */
1907         if(!s->encoding){
1908             uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
1909             const int age= s->current_picture.age;
1910
1911             assert(age);
1912
1913             if (s->mb_skipped) {
1914                 s->mb_skipped= 0;
1915                 assert(s->pict_type!=I_TYPE);
1916
1917                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
1918                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1919
1920                 /* if previous was skipped too, then nothing to do !  */
1921                 if (*mbskip_ptr >= age && s->current_picture.reference){
1922                     return;
1923                 }
1924             } else if(!s->current_picture.reference){
1925                 (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
1926                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1927             } else{
1928                 *mbskip_ptr = 0; /* not skipped */
1929             }
1930         }
1931
1932         dct_linesize = linesize << s->interlaced_dct;
1933         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
1934
1935         if(readable){
1936             dest_y=  s->dest[0];
1937             dest_cb= s->dest[1];
1938             dest_cr= s->dest[2];
1939         }else{
1940             dest_y = s->b_scratchpad;
1941             dest_cb= s->b_scratchpad+16*linesize;
1942             dest_cr= s->b_scratchpad+32*linesize;
1943         }
1944
1945         if (!s->mb_intra) {
1946             /* motion handling */
1947             /* decoding or more than one mb_type (MC was already done otherwise) */
1948             if(!s->encoding){
1949                 if(lowres_flag){
1950                     h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
1951
1952                     if (s->mv_dir & MV_DIR_FORWARD) {
1953                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
1954                         op_pix = s->dsp.avg_h264_chroma_pixels_tab;
1955                     }
1956                     if (s->mv_dir & MV_DIR_BACKWARD) {
1957                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
1958                     }
1959                 }else{
1960                     op_qpix= s->me.qpel_put;
1961                     if ((!s->no_rounding) || s->pict_type==B_TYPE){
1962                         op_pix = s->dsp.put_pixels_tab;
1963                     }else{
1964                         op_pix = s->dsp.put_no_rnd_pixels_tab;
1965                     }
1966                     if (s->mv_dir & MV_DIR_FORWARD) {
1967                         MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
1968                         op_pix = s->dsp.avg_pixels_tab;
1969                         op_qpix= s->me.qpel_avg;
1970                     }
1971                     if (s->mv_dir & MV_DIR_BACKWARD) {
1972                         MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
1973                     }
1974                 }
1975             }
1976
1977             /* skip dequant / idct if we are really late ;) */
1978             if(s->hurry_up>1) goto skip_idct;
1979             if(s->avctx->skip_idct){
1980                 if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
1981                    ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
1982                    || s->avctx->skip_idct >= AVDISCARD_ALL)
1983                     goto skip_idct;
1984             }
1985
1986             /* add dct residue */
1987             if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
1988                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
1989                 add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
1990                 add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
1991                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
1992                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
1993
1994                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1995                     if (s->chroma_y_shift){
1996                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
1997                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
1998                     }else{
1999                         dct_linesize >>= 1;
2000                         dct_offset >>=1;
2001                         add_dequant_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2002                         add_dequant_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2003                         add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2004                         add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2005                     }
2006                 }
2007             } else if(s->codec_id != CODEC_ID_WMV2){
2008                 add_dct(s, block[0], 0, dest_y                          , dct_linesize);
2009                 add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
2010                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
2011                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
2012
2013                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2014                     if(s->chroma_y_shift){//Chroma420
2015                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
2016                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
2017                     }else{
2018                         //chroma422
2019                         dct_linesize = uvlinesize << s->interlaced_dct;
2020                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2021
2022                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
2023                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
2024                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
2025                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
2026                         if(!s->chroma_x_shift){//Chroma444
2027                             add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
2028                             add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
2029                             add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
2030                             add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
2031                         }
2032                     }
2033                 }//fi gray
2034             }
2035             else if (ENABLE_WMV2) {
2036                 ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
2037             }
2038         } else {
2039             /* dct only in intra block */
2040             if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
2041                 put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
2042                 put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
2043                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
2044                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
2045
2046                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2047                     if(s->chroma_y_shift){
2048                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
2049                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
2050                     }else{
2051                         dct_offset >>=1;
2052                         dct_linesize >>=1;
2053                         put_dct(s, block[4], 4, dest_cb,              dct_linesize, s->chroma_qscale);
2054                         put_dct(s, block[5], 5, dest_cr,              dct_linesize, s->chroma_qscale);
2055                         put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale);
2056                         put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale);
2057                     }
2058                 }
2059             }else{
2060                 s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
2061                 s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
2062                 s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
2063                 s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
2064
2065                 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2066                     if(s->chroma_y_shift){
2067                         s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
2068                         s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
2069                     }else{
2070
2071                         dct_linesize = uvlinesize << s->interlaced_dct;
2072                         dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
2073
2074                         s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
2075                         s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
2076                         s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
2077                         s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
2078                         if(!s->chroma_x_shift){//Chroma444
2079                             s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
2080                             s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
2081                             s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
2082                             s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
2083                         }
2084                     }
2085                 }//gray
2086             }
2087         }
2088 skip_idct:
2089         if(!readable){
2090             s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
2091             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
2092             s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
2093         }
2094     }
2095 }
2096
2097 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
2098     if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
2099     else                  MPV_decode_mb_internal(s, block, 0);
2100 }
2101
2102 /**
2103  *
2104  * @param h is the normal height, this will be reduced automatically if needed for the last row
2105  */
2106 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
2107     if (s->avctx->draw_horiz_band) {
2108         AVFrame *src;
2109         int offset[4];
2110
2111         if(s->picture_structure != PICT_FRAME){
2112             h <<= 1;
2113             y <<= 1;
2114             if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
2115         }
2116
2117         h= FFMIN(h, s->avctx->height - y);
2118
2119         if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
2120             src= (AVFrame*)s->current_picture_ptr;
2121         else if(s->last_picture_ptr)
2122             src= (AVFrame*)s->last_picture_ptr;
2123         else
2124             return;
2125
2126         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
2127             offset[0]=
2128             offset[1]=
2129             offset[2]=
2130             offset[3]= 0;
2131         }else{
2132             offset[0]= y * s->linesize;
2133             offset[1]=
2134             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
2135             offset[3]= 0;
2136         }
2137
2138         emms_c();
2139
2140         s->avctx->draw_horiz_band(s->avctx, src, offset,
2141                                   y, s->picture_structure, h);
2142     }
2143 }
2144
2145 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
2146     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
2147     const int uvlinesize= s->current_picture.linesize[1];
2148     const int mb_size= 4 - s->avctx->lowres;
2149
2150     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
2151     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
2152     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
2153     s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
2154     s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2155     s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
2156     //block_index is not used by mpeg2, so it is not affected by chroma_format
2157
2158     s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
2159     s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2160     s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
2161
2162     if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
2163     {
2164         s->dest[0] += s->mb_y *   linesize << mb_size;
2165         s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2166         s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
2167     }
2168 }
2169
2170 void ff_mpeg_flush(AVCodecContext *avctx){
2171     int i;
2172     MpegEncContext *s = avctx->priv_data;
2173
2174     if(s==NULL || s->picture==NULL)
2175         return;
2176
2177     for(i=0; i<MAX_PICTURE_COUNT; i++){
2178        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
2179                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
2180         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
2181     }
2182     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
2183
2184     s->mb_x= s->mb_y= 0;
2185
2186     s->parse_context.state= -1;
2187     s->parse_context.frame_start_found= 0;
2188     s->parse_context.overread= 0;
2189     s->parse_context.overread_index= 0;
2190     s->parse_context.index= 0;
2191     s->parse_context.last_index= 0;
2192     s->bitstream_buffer_size=0;
2193     s->pp_time=0;
2194 }
2195
2196 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
2197                                    DCTELEM *block, int n, int qscale)
2198 {
2199     int i, level, nCoeffs;
2200     const uint16_t *quant_matrix;
2201
2202     nCoeffs= s->block_last_index[n];
2203
2204     if (n < 4)
2205         block[0] = block[0] * s->y_dc_scale;
2206     else
2207         block[0] = block[0] * s->c_dc_scale;
2208     /* XXX: only mpeg1 */
2209     quant_matrix = s->intra_matrix;
2210     for(i=1;i<=nCoeffs;i++) {
2211         int j= s->intra_scantable.permutated[i];
2212         level = block[j];
2213         if (level) {
2214             if (level < 0) {
2215                 level = -level;
2216                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2217                 level = (level - 1) | 1;
2218                 level = -level;
2219             } else {
2220                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2221                 level = (level - 1) | 1;
2222             }
2223             block[j] = level;
2224         }
2225     }
2226 }
2227
2228 static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
2229                                    DCTELEM *block, int n, int qscale)
2230 {
2231     int i, level, nCoeffs;
2232     const uint16_t *quant_matrix;
2233
2234     nCoeffs= s->block_last_index[n];
2235
2236     quant_matrix = s->inter_matrix;
2237     for(i=0; i<=nCoeffs; i++) {
2238         int j= s->intra_scantable.permutated[i];
2239         level = block[j];
2240         if (level) {
2241             if (level < 0) {
2242                 level = -level;
2243                 level = (((level << 1) + 1) * qscale *
2244                          ((int) (quant_matrix[j]))) >> 4;
2245                 level = (level - 1) | 1;
2246                 level = -level;
2247             } else {
2248                 level = (((level << 1) + 1) * qscale *
2249                          ((int) (quant_matrix[j]))) >> 4;
2250                 level = (level - 1) | 1;
2251             }
2252             block[j] = level;
2253         }
2254     }
2255 }
2256
2257 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
2258                                    DCTELEM *block, int n, int qscale)
2259 {
2260     int i, level, nCoeffs;
2261     const uint16_t *quant_matrix;
2262
2263     if(s->alternate_scan) nCoeffs= 63;
2264     else nCoeffs= s->block_last_index[n];
2265
2266     if (n < 4)
2267         block[0] = block[0] * s->y_dc_scale;
2268     else
2269         block[0] = block[0] * s->c_dc_scale;
2270     quant_matrix = s->intra_matrix;
2271     for(i=1;i<=nCoeffs;i++) {
2272         int j= s->intra_scantable.permutated[i];
2273         level = block[j];
2274         if (level) {
2275             if (level < 0) {
2276                 level = -level;
2277                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2278                 level = -level;
2279             } else {
2280                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2281             }
2282             block[j] = level;
2283         }
2284     }
2285 }
2286
2287 static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
2288                                    DCTELEM *block, int n, int qscale)
2289 {
2290     int i, level, nCoeffs;
2291     const uint16_t *quant_matrix;
2292     int sum=-1;
2293
2294     if(s->alternate_scan) nCoeffs= 63;
2295     else nCoeffs= s->block_last_index[n];
2296
2297     if (n < 4)
2298         block[0] = block[0] * s->y_dc_scale;
2299     else
2300         block[0] = block[0] * s->c_dc_scale;
2301     quant_matrix = s->intra_matrix;
2302     for(i=1;i<=nCoeffs;i++) {
2303         int j= s->intra_scantable.permutated[i];
2304         level = block[j];
2305         if (level) {
2306             if (level < 0) {
2307                 level = -level;
2308                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2309                 level = -level;
2310             } else {
2311                 level = (int)(level * qscale * quant_matrix[j]) >> 3;
2312             }
2313             block[j] = level;
2314             sum+=level;
2315         }
2316     }
2317     block[63]^=sum&1;
2318 }
2319
2320 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
2321                                    DCTELEM *block, int n, int qscale)
2322 {
2323     int i, level, nCoeffs;
2324     const uint16_t *quant_matrix;
2325     int sum=-1;
2326
2327     if(s->alternate_scan) nCoeffs= 63;
2328     else nCoeffs= s->block_last_index[n];
2329
2330     quant_matrix = s->inter_matrix;
2331     for(i=0; i<=nCoeffs; i++) {
2332         int j= s->intra_scantable.permutated[i];
2333         level = block[j];
2334         if (level) {
2335             if (level < 0) {
2336                 level = -level;
2337                 level = (((level << 1) + 1) * qscale *
2338                          ((int) (quant_matrix[j]))) >> 4;
2339                 level = -level;
2340             } else {
2341                 level = (((level << 1) + 1) * qscale *
2342                          ((int) (quant_matrix[j]))) >> 4;
2343             }
2344             block[j] = level;
2345             sum+=level;
2346         }
2347     }
2348     block[63]^=sum&1;
2349 }
2350
2351 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
2352                                   DCTELEM *block, int n, int qscale)
2353 {
2354     int i, level, qmul, qadd;
2355     int nCoeffs;
2356
2357     assert(s->block_last_index[n]>=0);
2358
2359     qmul = qscale << 1;
2360
2361     if (!s->h263_aic) {
2362         if (n < 4)
2363             block[0] = block[0] * s->y_dc_scale;
2364         else
2365             block[0] = block[0] * s->c_dc_scale;
2366         qadd = (qscale - 1) | 1;
2367     }else{
2368         qadd = 0;
2369     }
2370     if(s->ac_pred)
2371         nCoeffs=63;
2372     else
2373         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2374
2375     for(i=1; i<=nCoeffs; i++) {
2376         level = block[i];
2377         if (level) {
2378             if (level < 0) {
2379                 level = level * qmul - qadd;
2380             } else {
2381                 level = level * qmul + qadd;
2382             }
2383             block[i] = level;
2384         }
2385     }
2386 }
2387
2388 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
2389                                   DCTELEM *block, int n, int qscale)
2390 {
2391     int i, level, qmul, qadd;
2392     int nCoeffs;
2393
2394     assert(s->block_last_index[n]>=0);
2395
2396     qadd = (qscale - 1) | 1;
2397     qmul = qscale << 1;
2398
2399     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
2400
2401     for(i=0; i<=nCoeffs; i++) {
2402         level = block[i];
2403         if (level) {
2404             if (level < 0) {
2405                 level = level * qmul - qadd;
2406             } else {
2407                 level = level * qmul + qadd;
2408             }
2409             block[i] = level;
2410         }
2411     }
2412 }
2413
2414 /**
2415  * set qscale and update qscale dependent variables.
2416  */
2417 void ff_set_qscale(MpegEncContext * s, int qscale)
2418 {
2419     if (qscale < 1)
2420         qscale = 1;
2421     else if (qscale > 31)
2422         qscale = 31;
2423
2424     s->qscale = qscale;
2425     s->chroma_qscale= s->chroma_qscale_table[qscale];
2426
2427     s->y_dc_scale= s->y_dc_scale_table[ qscale ];
2428     s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ];
2429 }