libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file libavcodec/4xm.c
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/intreadwrite.h"
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "bitstream.h"
  31 #include "bytestream.h"
  32
  33 //#undef NDEBUG
  34 //#include <assert.h>
  35
  36 #define BLOCK_TYPE_VLC_BITS 5
  37 #define ACDC_VLC_BITS 9
  38
  39 #define CFRAME_BUFFER_COUNT 100
  40
  41 static const uint8_t block_type_tab[2][4][8][2]={
  42  {
  43   {   //{8,4,2}x{8,4,2}
  44     { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0}
  45   },{ //{8,4}x1
  46     { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0}
  47   },{ //1x{8,4}
  48     { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0}
  49   },{ //1x2, 2x1
  50     { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}
  51   }
  52  },{
  53   {  //{8,4,2}x{8,4,2}
  54     { 1,2}, { 4,3}, { 5,3}, {0,2}, {6,3}, {7,3}, {0,0}
  55   },{//{8,4}x1
  56     { 1,2}, { 0,0}, { 2,2}, {0,2}, {6,3}, {7,3}, {0,0}
  57   },{//1x{8,4}
  58     { 1,2}, { 2,2}, { 0,0}, {0,2}, {6,3}, {7,3}, {0,0}
  59   },{//1x2, 2x1
  60     { 1,2}, { 0,0}, { 0,0}, {0,2}, {2,2}, {6,3}, {7,3}
  61   }
  62  }
  63 };
  64
  65 static const uint8_t size2index[4][4]={
  66   {-1, 3, 1, 1},
  67   { 3, 0, 0, 0},
  68   { 2, 0, 0, 0},
  69   { 2, 0, 0, 0},
  70 };
  71
  72 static const int8_t mv[256][2]={
  73 {  0,  0},{  0, -1},{ -1,  0},{  1,  0},{  0,  1},{ -1, -1},{  1, -1},{ -1,  1},
  74 {  1,  1},{  0, -2},{ -2,  0},{  2,  0},{  0,  2},{ -1, -2},{  1, -2},{ -2, -1},
  75 {  2, -1},{ -2,  1},{  2,  1},{ -1,  2},{  1,  2},{ -2, -2},{  2, -2},{ -2,  2},
  76 {  2,  2},{  0, -3},{ -3,  0},{  3,  0},{  0,  3},{ -1, -3},{  1, -3},{ -3, -1},
  77 {  3, -1},{ -3,  1},{  3,  1},{ -1,  3},{  1,  3},{ -2, -3},{  2, -3},{ -3, -2},
  78 {  3, -2},{ -3,  2},{  3,  2},{ -2,  3},{  2,  3},{  0, -4},{ -4,  0},{  4,  0},
  79 {  0,  4},{ -1, -4},{  1, -4},{ -4, -1},{  4, -1},{  4,  1},{ -1,  4},{  1,  4},
  80 { -3, -3},{ -3,  3},{  3,  3},{ -2, -4},{ -4, -2},{  4, -2},{ -4,  2},{ -2,  4},
  81 {  2,  4},{ -3, -4},{  3, -4},{  4, -3},{ -5,  0},{ -4,  3},{ -3,  4},{  3,  4},
  82 { -1, -5},{ -5, -1},{ -5,  1},{ -1,  5},{ -2, -5},{  2, -5},{  5, -2},{  5,  2},
  83 { -4, -4},{ -4,  4},{ -3, -5},{ -5, -3},{ -5,  3},{  3,  5},{ -6,  0},{  0,  6},
  84 { -6, -1},{ -6,  1},{  1,  6},{  2, -6},{ -6,  2},{  2,  6},{ -5, -4},{  5,  4},
  85 {  4,  5},{ -6, -3},{  6,  3},{ -7,  0},{ -1, -7},{  5, -5},{ -7,  1},{ -1,  7},
  86 {  4, -6},{  6,  4},{ -2, -7},{ -7,  2},{ -3, -7},{  7, -3},{  3,  7},{  6, -5},
  87 {  0, -8},{ -1, -8},{ -7, -4},{ -8,  1},{  4,  7},{  2, -8},{ -2,  8},{  6,  6},
  88 { -8,  3},{  5, -7},{ -5,  7},{  8, -4},{  0, -9},{ -9, -1},{  1,  9},{  7, -6},
  89 { -7,  6},{ -5, -8},{ -5,  8},{ -9,  3},{  9, -4},{  7, -7},{  8, -6},{  6,  8},
  90 { 10,  1},{-10,  2},{  9, -5},{ 10, -3},{ -8, -7},{-10, -4},{  6, -9},{-11,  0},
  91 { 11,  1},{-11, -2},{ -2, 11},{  7, -9},{ -7,  9},{ 10,  6},{ -4, 11},{  8, -9},
  92 {  8,  9},{  5, 11},{  7,-10},{ 12, -3},{ 11,  6},{ -9, -9},{  8, 10},{  5, 12},
  93 {-11,  7},{ 13,  2},{  6,-12},{ 10,  9},{-11,  8},{ -7, 12},{  0, 14},{ 14, -2},
  94 { -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{  5, 14},{-15, -1},{-14, -6},{  3,-15},
  95 { 11,-11},{ -7, 14},{ -5, 15},{  8,-14},{ 15,  6},{  3, 16},{  7,-15},{-16,  5},
  96 {  0, 17},{-16, -6},{-10, 14},{-16,  7},{ 12, 13},{-16,  8},{-17,  6},{-18,  3},
  97 { -7, 17},{ 15, 11},{ 16, 10},{  2,-19},{  3,-19},{-11,-16},{-18,  8},{-19, -6},
  98 {  2,-20},{-17,-11},{-10,-18},{  8, 19},{-21, -1},{-20,  7},{ -4, 21},{ 21,  5},
  99 { 15, 16},{  2,-22},{-10,-20},{-22,  5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5},
 100 { 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24},
 101 { 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27,  6},{  1,-28},
 102 {-11, 26},{-17,-23},{  7, 28},{ 11,-27},{ 29,  5},{-23,-19},{-28,-11},{-21, 22},
 103 {-30,  7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27},
 104 {-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32}
 105 };
 106
 107 // this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table
 108 static const uint8_t dequant_table[64]={
 109  16, 15, 13, 19, 24, 31, 28, 17,
 110  17, 23, 25, 31, 36, 63, 45, 21,
 111  18, 24, 27, 37, 52, 59, 49, 20,
 112  16, 28, 34, 40, 60, 80, 51, 20,
 113  18, 31, 48, 66, 68, 86, 56, 21,
 114  19, 38, 56, 59, 64, 64, 48, 20,
 115  27, 48, 55, 55, 56, 51, 35, 15,
 116  20, 35, 34, 32, 31, 22, 15,  8,
 117 };
 118
 119 static VLC block_type_vlc[2][4];
 120
 121
 122 typedef struct CFrameBuffer{
 123     unsigned int allocated_size;
 124     unsigned int size;
 125     int id;
 126     uint8_t *data;
 127 }CFrameBuffer;
 128
 129 typedef struct FourXContext{
 130     AVCodecContext *avctx;
 131     DSPContext dsp;
 132     AVFrame current_picture, last_picture;
 133     GetBitContext pre_gb;          ///< ac/dc prefix
 134     GetBitContext gb;
 135     const uint8_t *bytestream;
 136     const uint16_t *wordstream;
 137     int mv[256];
 138     VLC pre_vlc;
 139     int last_dc;
 140     DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
 141     uint8_t *bitstream_buffer;
 142     unsigned int bitstream_buffer_size;
 143     int version;
 144     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 145 } FourXContext;
 146
 147
 148 #define FIX_1_082392200  70936
 149 #define FIX_1_414213562  92682
 150 #define FIX_1_847759065 121095
 151 #define FIX_2_613125930 171254
 152
 153 #define MULTIPLY(var,const)  (((var)*(const)) >> 16)
 154
 155 static void idct(DCTELEM block[64]){
 156     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 157     int tmp10, tmp11, tmp12, tmp13;
 158     int z5, z10, z11, z12, z13;
 159     int i;
 160     int temp[64];
 161
 162     for(i=0; i<8; i++){
 163         tmp10 = block[8*0 + i] + block[8*4 + i];
 164         tmp11 = block[8*0 + i] - block[8*4 + i];
 165
 166         tmp13 =          block[8*2 + i] + block[8*6 + i];
 167         tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13;
 168
 169         tmp0 = tmp10 + tmp13;
 170         tmp3 = tmp10 - tmp13;
 171         tmp1 = tmp11 + tmp12;
 172         tmp2 = tmp11 - tmp12;
 173
 174         z13 = block[8*5 + i] + block[8*3 + i];
 175         z10 = block[8*5 + i] - block[8*3 + i];
 176         z11 = block[8*1 + i] + block[8*7 + i];
 177         z12 = block[8*1 + i] - block[8*7 + i];
 178
 179         tmp7  =          z11 + z13;
 180         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 181
 182         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 183         tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
 184         tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
 185
 186         tmp6 = tmp12 - tmp7;
 187         tmp5 = tmp11 - tmp6;
 188         tmp4 = tmp10 + tmp5;
 189
 190         temp[8*0 + i] = tmp0 + tmp7;
 191         temp[8*7 + i] = tmp0 - tmp7;
 192         temp[8*1 + i] = tmp1 + tmp6;
 193         temp[8*6 + i] = tmp1 - tmp6;
 194         temp[8*2 + i] = tmp2 + tmp5;
 195         temp[8*5 + i] = tmp2 - tmp5;
 196         temp[8*4 + i] = tmp3 + tmp4;
 197         temp[8*3 + i] = tmp3 - tmp4;
 198     }
 199
 200     for(i=0; i<8*8; i+=8){
 201         tmp10 = temp[0 + i] + temp[4 + i];
 202         tmp11 = temp[0 + i] - temp[4 + i];
 203
 204         tmp13 = temp[2 + i] + temp[6 + i];
 205         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 206
 207         tmp0 = tmp10 + tmp13;
 208         tmp3 = tmp10 - tmp13;
 209         tmp1 = tmp11 + tmp12;
 210         tmp2 = tmp11 - tmp12;
 211
 212         z13 = temp[5 + i] + temp[3 + i];
 213         z10 = temp[5 + i] - temp[3 + i];
 214         z11 = temp[1 + i] + temp[7 + i];
 215         z12 = temp[1 + i] - temp[7 + i];
 216
 217         tmp7 = z11 + z13;
 218         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 219
 220         z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
 221         tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
 222         tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
 223
 224         tmp6 = tmp12 - tmp7;
 225         tmp5 = tmp11 - tmp6;
 226         tmp4 = tmp10 + tmp5;
 227
 228         block[0 + i] = (tmp0 + tmp7)>>6;
 229         block[7 + i] = (tmp0 - tmp7)>>6;
 230         block[1 + i] = (tmp1 + tmp6)>>6;
 231         block[6 + i] = (tmp1 - tmp6)>>6;
 232         block[2 + i] = (tmp2 + tmp5)>>6;
 233         block[5 + i] = (tmp2 - tmp5)>>6;
 234         block[4 + i] = (tmp3 + tmp4)>>6;
 235         block[3 + i] = (tmp3 - tmp4)>>6;
 236     }
 237 }
 238
 239 static av_cold void init_vlcs(FourXContext *f){
 240     int i;
 241
 242     for(i=0; i<8; i++){
 243         init_vlc(&block_type_vlc[0][i], BLOCK_TYPE_VLC_BITS, 7,
 244                  &block_type_tab[0][i][0][1], 2, 1,
 245                  &block_type_tab[0][i][0][0], 2, 1, INIT_VLC_USE_STATIC);
 246     }
 247 }
 248
 249 static void init_mv(FourXContext *f){
 250     int i;
 251
 252     for(i=0; i<256; i++){
 253         if(f->version>1)
 254             f->mv[i] = mv[i][0]   + mv[i][1]  *f->current_picture.linesize[0]/2;
 255         else
 256             f->mv[i] = (i&15) - 8 + ((i>>4)-8)*f->current_picture.linesize[0]/2;
 257     }
 258 }
 259
 260 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, int dc){
 261    int i;
 262    dc*= 0x10001;
 263
 264    switch(log2w){
 265    case 0:
 266         for(i=0; i<h; i++){
 267             dst[0] = scale*src[0] + dc;
 268             if(scale) src += stride;
 269             dst += stride;
 270         }
 271         break;
 272     case 1:
 273         for(i=0; i<h; i++){
 274             ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
 275             if(scale) src += stride;
 276             dst += stride;
 277         }
 278         break;
 279     case 2:
 280         for(i=0; i<h; i++){
 281             ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
 282             ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc;
 283             if(scale) src += stride;
 284             dst += stride;
 285         }
 286         break;
 287     case 3:
 288         for(i=0; i<h; i++){
 289             ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
 290             ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc;
 291             ((uint32_t*)dst)[2] = scale*((uint32_t*)src)[2] + dc;
 292             ((uint32_t*)dst)[3] = scale*((uint32_t*)src)[3] + dc;
 293             if(scale) src += stride;
 294             dst += stride;
 295         }
 296         break;
 297     default: assert(0);
 298     }
 299 }
 300
 301 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int log2w, int log2h, int stride){
 302     const int index= size2index[log2h][log2w];
 303     const int h= 1<<log2h;
 304     int code= get_vlc2(&f->gb, block_type_vlc[1-(f->version>1)][index].table, BLOCK_TYPE_VLC_BITS, 1);
 305     uint16_t *start= (uint16_t*)f->last_picture.data[0];
 306     uint16_t *end= start + stride*(f->avctx->height-h+1) - (1<<log2w);
 307
 308     assert(code>=0 && code<=6);
 309
 310     if(code == 0){
 311         src += f->mv[ *f->bytestream++ ];
 312         if(start > src || src > end){
 313             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 314             return;
 315         }
 316         mcdc(dst, src, log2w, h, stride, 1, 0);
 317     }else if(code == 1){
 318         log2h--;
 319         decode_p_block(f, dst                  , src                  , log2w, log2h, stride);
 320         decode_p_block(f, dst + (stride<<log2h), src + (stride<<log2h), log2w, log2h, stride);
 321     }else if(code == 2){
 322         log2w--;
 323         decode_p_block(f, dst             , src             , log2w, log2h, stride);
 324         decode_p_block(f, dst + (1<<log2w), src + (1<<log2w), log2w, log2h, stride);
 325     }else if(code == 3 && f->version<2){
 326         mcdc(dst, src, log2w, h, stride, 1, 0);
 327     }else if(code == 4){
 328         src += f->mv[ *f->bytestream++ ];
 329         if(start > src || src > end){
 330             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 331             return;
 332         }
 333         mcdc(dst, src, log2w, h, stride, 1, le2me_16(*f->wordstream++));
 334     }else if(code == 5){
 335         mcdc(dst, src, log2w, h, stride, 0, le2me_16(*f->wordstream++));
 336     }else if(code == 6){
 337         if(log2w){
 338             dst[0] = le2me_16(*f->wordstream++);
 339             dst[1] = le2me_16(*f->wordstream++);
 340         }else{
 341             dst[0     ] = le2me_16(*f->wordstream++);
 342             dst[stride] = le2me_16(*f->wordstream++);
 343         }
 344     }
 345 }
 346
 347 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length){
 348     int x, y;
 349     const int width= f->avctx->width;
 350     const int height= f->avctx->height;
 351     uint16_t *src= (uint16_t*)f->last_picture.data[0];
 352     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 353     const int stride= f->current_picture.linesize[0]>>1;
 354     unsigned int bitstream_size, bytestream_size, wordstream_size, extra;
 355
 356     if(f->version>1){
 357         extra=20;
 358         bitstream_size= AV_RL32(buf+8);
 359         wordstream_size= AV_RL32(buf+12);
 360         bytestream_size= AV_RL32(buf+16);
 361     }else{
 362         extra=0;
 363         bitstream_size = AV_RL16(buf-4);
 364         wordstream_size= AV_RL16(buf-2);
 365         bytestream_size= FFMAX(length - bitstream_size - wordstream_size, 0);
 366     }
 367
 368     if(bitstream_size+ bytestream_size+ wordstream_size + extra != length
 369        || bitstream_size  > (1<<26)
 370        || bytestream_size > (1<<26)
 371        || wordstream_size > (1<<26)
 372        ){
 373         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
 374         bitstream_size+ bytestream_size+ wordstream_size - length);
 375         return -1;
 376     }
 377
 378     f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 379     f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (const uint32_t*)(buf + extra), bitstream_size/4);
 380     init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size);
 381
 382     f->wordstream= (const uint16_t*)(buf + extra + bitstream_size);
 383     f->bytestream= buf + extra + bitstream_size + wordstream_size;
 384
 385     init_mv(f);
 386
 387     for(y=0; y<height; y+=8){
 388         for(x=0; x<width; x+=8){
 389             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 390         }
 391         src += 8*stride;
 392         dst += 8*stride;
 393     }
 394
 395     if(   bitstream_size != (get_bits_count(&f->gb)+31)/32*4
 396        || (((const char*)f->wordstream - (const char*)buf + 2)&~2) != extra + bitstream_size + wordstream_size
 397        || (((const char*)f->bytestream - (const char*)buf + 3)&~3) != extra + bitstream_size + wordstream_size + bytestream_size)
 398         av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n",
 399             bitstream_size - (get_bits_count(&f->gb)+31)/32*4,
 400             -(((const char*)f->bytestream - (const char*)buf + 3)&~3) + (extra + bitstream_size + wordstream_size + bytestream_size),
 401             -(((const char*)f->wordstream - (const char*)buf + 2)&~2) + (extra + bitstream_size + wordstream_size)
 402         );
 403
 404     return 0;
 405 }
 406
 407 /**
 408  * decode block and dequantize.
 409  * Note this is almost identical to MJPEG.
 410  */
 411 static int decode_i_block(FourXContext *f, DCTELEM *block){
 412     int code, i, j, level, val;
 413
 414     /* DC coef */
 415     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 416     if (val>>4){
 417         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 418     }
 419
 420     if(val)
 421         val = get_xbits(&f->gb, val);
 422
 423     val = val * dequant_table[0] + f->last_dc;
 424     f->last_dc =
 425     block[0] = val;
 426     /* AC coefs */
 427     i = 1;
 428     for(;;) {
 429         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 430
 431         /* EOB */
 432         if (code == 0)
 433             break;
 434         if (code == 0xf0) {
 435             i += 16;
 436         } else {
 437             level = get_xbits(&f->gb, code & 0xf);
 438             i += code >> 4;
 439             if (i >= 64) {
 440                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 441                 return 0;
 442             }
 443
 444             j= ff_zigzag_direct[i];
 445             block[j] = level * dequant_table[j];
 446             i++;
 447             if (i >= 64)
 448                 break;
 449         }
 450     }
 451
 452     return 0;
 453 }
 454
 455 static inline void idct_put(FourXContext *f, int x, int y){
 456     DCTELEM (*block)[64]= f->block;
 457     int stride= f->current_picture.linesize[0]>>1;
 458     int i;
 459     uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
 460
 461     for(i=0; i<4; i++){
 462         block[i][0] += 0x80*8*8;
 463         idct(block[i]);
 464     }
 465
 466     if(!(f->avctx->flags&CODEC_FLAG_GRAY)){
 467         for(i=4; i<6; i++) idct(block[i]);
 468     }
 469
 470 /* Note transform is:
 471 y= ( 1b + 4g + 2r)/14
 472 cb=( 3b - 2g - 1r)/14
 473 cr=(-1b - 4g + 5r)/14
 474 */
 475     for(y=0; y<8; y++){
 476         for(x=0; x<8; x++){
 477             DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize
 478             int cb= block[4][x + 8*y];
 479             int cr= block[5][x + 8*y];
 480             int cg= (cb + cr)>>1;
 481             int y;
 482
 483             cb+=cb;
 484
 485             y = temp[0];
 486             dst[0       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 487             y = temp[1];
 488             dst[1       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 489             y = temp[8];
 490             dst[  stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 491             y = temp[9];
 492             dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 493             dst += 2;
 494         }
 495         dst += 2*stride - 2*8;
 496     }
 497 }
 498
 499 static int decode_i_mb(FourXContext *f){
 500     int i;
 501
 502     f->dsp.clear_blocks(f->block[0]);
 503
 504     for(i=0; i<6; i++){
 505         if(decode_i_block(f, f->block[i]) < 0)
 506             return -1;
 507     }
 508
 509     return 0;
 510 }
 511
 512 static const uint8_t *read_huffman_tables(FourXContext *f, const uint8_t * const buf){
 513     int frequency[512];
 514     uint8_t flag[512];
 515     int up[512];
 516     uint8_t len_tab[257];
 517     int bits_tab[257];
 518     int start, end;
 519     const uint8_t *ptr= buf;
 520     int j;
 521
 522     memset(frequency, 0, sizeof(frequency));
 523     memset(up, -1, sizeof(up));
 524
 525     start= *ptr++;
 526     end= *ptr++;
 527     for(;;){
 528         int i;
 529
 530         for(i=start; i<=end; i++){
 531             frequency[i]= *ptr++;
 532         }
 533         start= *ptr++;
 534         if(start==0) break;
 535
 536         end= *ptr++;
 537     }
 538     frequency[256]=1;
 539
 540     while((ptr - buf)&3) ptr++; // 4byte align
 541
 542     for(j=257; j<512; j++){
 543         int min_freq[2]= {256*256, 256*256};
 544         int smallest[2]= {0, 0};
 545         int i;
 546         for(i=0; i<j; i++){
 547             if(frequency[i] == 0) continue;
 548             if(frequency[i] < min_freq[1]){
 549                 if(frequency[i] < min_freq[0]){
 550                     min_freq[1]= min_freq[0]; smallest[1]= smallest[0];
 551                     min_freq[0]= frequency[i];smallest[0]= i;
 552                 }else{
 553                     min_freq[1]= frequency[i];smallest[1]= i;
 554                 }
 555             }
 556         }
 557         if(min_freq[1] == 256*256) break;
 558
 559         frequency[j]= min_freq[0] + min_freq[1];
 560         flag[ smallest[0] ]= 0;
 561         flag[ smallest[1] ]= 1;
 562         up[ smallest[0] ]=
 563         up[ smallest[1] ]= j;
 564         frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0;
 565     }
 566
 567     for(j=0; j<257; j++){
 568         int node;
 569         int len=0;
 570         int bits=0;
 571
 572         for(node= j; up[node] != -1; node= up[node]){
 573             bits += flag[node]<<len;
 574             len++;
 575             if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ?
 576         }
 577
 578         bits_tab[j]= bits;
 579         len_tab[j]= len;
 580     }
 581
 582     init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257,
 583              len_tab , 1, 1,
 584              bits_tab, 4, 4, 0);
 585
 586     return ptr;
 587 }
 588
 589 static int mix(int c0, int c1){
 590     int blue = 2*(c0&0x001F) + (c1&0x001F);
 591     int green= (2*(c0&0x03E0) + (c1&0x03E0))>>5;
 592     int red  = 2*(c0>>10) + (c1>>10);
 593     return red/3*1024 + green/3*32 + blue/3;
 594 }
 595
 596 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length){
 597     int x, y, x2, y2;
 598     const int width= f->avctx->width;
 599     const int height= f->avctx->height;
 600     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 601     const int stride= f->current_picture.linesize[0]>>1;
 602
 603     for(y=0; y<height; y+=16){
 604         for(x=0; x<width; x+=16){
 605             unsigned int color[4], bits;
 606             memset(color, 0, sizeof(color));
 607 //warning following is purely guessed ...
 608             color[0]= bytestream_get_le16(&buf);
 609             color[1]= bytestream_get_le16(&buf);
 610
 611             if(color[0]&0x8000) av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 612             if(color[1]&0x8000) av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 613
 614             color[2]= mix(color[0], color[1]);
 615             color[3]= mix(color[1], color[0]);
 616
 617             bits= bytestream_get_le32(&buf);
 618             for(y2=0; y2<16; y2++){
 619                 for(x2=0; x2<16; x2++){
 620                     int index= 2*(x2>>2) + 8*(y2>>2);
 621                     dst[y2*stride+x2]= color[(bits>>index)&3];
 622                 }
 623             }
 624             dst+=16;
 625         }
 626         dst += 16*stride - width;
 627     }
 628
 629     return 0;
 630 }
 631
 632 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length){
 633     int x, y;
 634     const int width= f->avctx->width;
 635     const int height= f->avctx->height;
 636     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 637     const int stride= f->current_picture.linesize[0]>>1;
 638     const unsigned int bitstream_size= AV_RL32(buf);
 639     const int token_count av_unused = AV_RL32(buf + bitstream_size + 8);
 640     unsigned int prestream_size= 4*AV_RL32(buf + bitstream_size + 4);
 641     const uint8_t *prestream= buf + bitstream_size + 12;
 642
 643     if(prestream_size + bitstream_size + 12 != length
 644        || bitstream_size > (1<<26)
 645        || prestream_size > (1<<26)){
 646         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length);
 647         return -1;
 648     }
 649
 650     prestream= read_huffman_tables(f, prestream);
 651
 652     init_get_bits(&f->gb, buf + 4, 8*bitstream_size);
 653
 654     prestream_size= length + buf - prestream;
 655
 656     f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 657     f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (const uint32_t*)prestream, prestream_size/4);
 658     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size);
 659
 660     f->last_dc= 0*128*8*8;
 661
 662     for(y=0; y<height; y+=16){
 663         for(x=0; x<width; x+=16){
 664             if(decode_i_mb(f) < 0)
 665                 return -1;
 666
 667             idct_put(f, x, y);
 668         }
 669         dst += 16*stride;
 670     }
 671
 672     if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 673         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 674
 675     return 0;
 676 }
 677
 678 static int decode_frame(AVCodecContext *avctx,
 679                         void *data, int *data_size,
 680                         AVPacket *avpkt)
 681 {
 682     const uint8_t *buf = avpkt->data;
 683     int buf_size = avpkt->size;
 684     FourXContext * const f = avctx->priv_data;
 685     AVFrame *picture = data;
 686     AVFrame *p, temp;
 687     int i, frame_4cc, frame_size;
 688
 689     frame_4cc= AV_RL32(buf);
 690     if(buf_size != AV_RL32(buf+4)+8 || buf_size < 20){
 691         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, AV_RL32(buf+4));
 692     }
 693
 694     if(frame_4cc == AV_RL32("cfrm")){
 695         int free_index=-1;
 696         const int data_size= buf_size - 20;
 697         const int id= AV_RL32(buf+12);
 698         const int whole_size= AV_RL32(buf+16);
 699         CFrameBuffer *cfrm;
 700
 701         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 702             if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 703                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id);
 704         }
 705
 706         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 707             if(f->cfrm[i].id   == id) break;
 708             if(f->cfrm[i].size == 0 ) free_index= i;
 709         }
 710
 711         if(i>=CFRAME_BUFFER_COUNT){
 712             i= free_index;
 713             f->cfrm[i].id= id;
 714         }
 715         cfrm= &f->cfrm[i];
 716
 717         cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 718         if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL
 719             av_log(f->avctx, AV_LOG_ERROR, "realloc falure");
 720             return -1;
 721         }
 722
 723         memcpy(cfrm->data + cfrm->size, buf+20, data_size);
 724         cfrm->size += data_size;
 725
 726         if(cfrm->size >= whole_size){
 727             buf= cfrm->data;
 728             frame_size= cfrm->size;
 729
 730             if(id != avctx->frame_number){
 731                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number);
 732             }
 733
 734             cfrm->size= cfrm->id= 0;
 735             frame_4cc= AV_RL32("pfrm");
 736         }else
 737             return buf_size;
 738     }else{
 739         buf= buf + 12;
 740         frame_size= buf_size - 12;
 741     }
 742
 743     temp= f->current_picture;
 744     f->current_picture= f->last_picture;
 745     f->last_picture= temp;
 746
 747     p= &f->current_picture;
 748     avctx->coded_frame= p;
 749
 750     avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management
 751
 752     if(p->data[0])
 753         avctx->release_buffer(avctx, p);
 754
 755     p->reference= 1;
 756     if(avctx->get_buffer(avctx, p) < 0){
 757         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 758         return -1;
 759     }
 760
 761     if(frame_4cc == AV_RL32("ifr2")){
 762         p->pict_type= FF_I_TYPE;
 763         if(decode_i2_frame(f, buf-4, frame_size) < 0)
 764             return -1;
 765     }else if(frame_4cc == AV_RL32("ifrm")){
 766         p->pict_type= FF_I_TYPE;
 767         if(decode_i_frame(f, buf, frame_size) < 0)
 768             return -1;
 769     }else if(frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")){
 770         p->pict_type= FF_P_TYPE;
 771         if(decode_p_frame(f, buf, frame_size) < 0)
 772             return -1;
 773     }else if(frame_4cc == AV_RL32("snd_")){
 774         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size);
 775     }else{
 776         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size);
 777     }
 778
 779     p->key_frame= p->pict_type == FF_I_TYPE;
 780
 781     *picture= *p;
 782     *data_size = sizeof(AVPicture);
 783
 784     emms_c();
 785
 786     return buf_size;
 787 }
 788
 789
 790 static av_cold void common_init(AVCodecContext *avctx){
 791     FourXContext * const f = avctx->priv_data;
 792
 793     dsputil_init(&f->dsp, avctx);
 794
 795     f->avctx= avctx;
 796 }
 797
 798 static av_cold int decode_init(AVCodecContext *avctx){
 799     FourXContext * const f = avctx->priv_data;
 800
 801     if(avctx->extradata_size != 4 || !avctx->extradata) {
 802         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 803         return 1;
 804     }
 805
 806     f->version= AV_RL32(avctx->extradata)>>16;
 807     common_init(avctx);
 808     init_vlcs(f);
 809
 810     if(f->version>2) avctx->pix_fmt= PIX_FMT_RGB565;
 811     else             avctx->pix_fmt= PIX_FMT_RGB555;
 812
 813     return 0;
 814 }
 815
 816
 817 static av_cold int decode_end(AVCodecContext *avctx){
 818     FourXContext * const f = avctx->priv_data;
 819     int i;
 820
 821     av_freep(&f->bitstream_buffer);
 822     f->bitstream_buffer_size=0;
 823     for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 824         av_freep(&f->cfrm[i].data);
 825         f->cfrm[i].allocated_size= 0;
 826     }
 827     free_vlc(&f->pre_vlc);
 828
 829     return 0;
 830 }
 831
 832 AVCodec fourxm_decoder = {
 833     "4xm",
 834     CODEC_TYPE_VIDEO,
 835     CODEC_ID_4XM,
 836     sizeof(FourXContext),
 837     decode_init,
 838     NULL,
 839     decode_end,
 840     decode_frame,
 841     /*CODEC_CAP_DR1,*/
 842     .long_name = NULL_IF_CONFIG_SMALL("4X Movie"),
 843 };
 844