libavcodec/4xm.c

   1 /*
   2  * 4XM codec
   3  * Copyright (c) 2003 Michael Niedermayer
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file libavcodec/4xm.c
  24  * 4XM codec.
  25  */
  26
  27 #include "libavutil/intreadwrite.h"
  28 #include "avcodec.h"
  29 #include "dsputil.h"
  30 #include "get_bits.h"
  31 #include "bytestream.h"
  32
  33 //#undef NDEBUG
  34 //#include <assert.h>
  35
  36 #define BLOCK_TYPE_VLC_BITS 5
  37 #define ACDC_VLC_BITS 9
  38
  39 #define CFRAME_BUFFER_COUNT 100
  40
  41 static const uint8_t block_type_tab[2][4][8][2]={
  42  {
  43   {   //{8,4,2}x{8,4,2}
  44     { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0}
  45   },{ //{8,4}x1
  46     { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0}
  47   },{ //1x{8,4}
  48     { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0}
  49   },{ //1x2, 2x1
  50     { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}
  51   }
  52  },{
  53   {  //{8,4,2}x{8,4,2}
  54     { 1,2}, { 4,3}, { 5,3}, {0,2}, {6,3}, {7,3}, {0,0}
  55   },{//{8,4}x1
  56     { 1,2}, { 0,0}, { 2,2}, {0,2}, {6,3}, {7,3}, {0,0}
  57   },{//1x{8,4}
  58     { 1,2}, { 2,2}, { 0,0}, {0,2}, {6,3}, {7,3}, {0,0}
  59   },{//1x2, 2x1
  60     { 1,2}, { 0,0}, { 0,0}, {0,2}, {2,2}, {6,3}, {7,3}
  61   }
  62  }
  63 };
  64
  65 static const uint8_t size2index[4][4]={
  66   {-1, 3, 1, 1},
  67   { 3, 0, 0, 0},
  68   { 2, 0, 0, 0},
  69   { 2, 0, 0, 0},
  70 };
  71
  72 static const int8_t mv[256][2]={
  73 {  0,  0},{  0, -1},{ -1,  0},{  1,  0},{  0,  1},{ -1, -1},{  1, -1},{ -1,  1},
  74 {  1,  1},{  0, -2},{ -2,  0},{  2,  0},{  0,  2},{ -1, -2},{  1, -2},{ -2, -1},
  75 {  2, -1},{ -2,  1},{  2,  1},{ -1,  2},{  1,  2},{ -2, -2},{  2, -2},{ -2,  2},
  76 {  2,  2},{  0, -3},{ -3,  0},{  3,  0},{  0,  3},{ -1, -3},{  1, -3},{ -3, -1},
  77 {  3, -1},{ -3,  1},{  3,  1},{ -1,  3},{  1,  3},{ -2, -3},{  2, -3},{ -3, -2},
  78 {  3, -2},{ -3,  2},{  3,  2},{ -2,  3},{  2,  3},{  0, -4},{ -4,  0},{  4,  0},
  79 {  0,  4},{ -1, -4},{  1, -4},{ -4, -1},{  4, -1},{  4,  1},{ -1,  4},{  1,  4},
  80 { -3, -3},{ -3,  3},{  3,  3},{ -2, -4},{ -4, -2},{  4, -2},{ -4,  2},{ -2,  4},
  81 {  2,  4},{ -3, -4},{  3, -4},{  4, -3},{ -5,  0},{ -4,  3},{ -3,  4},{  3,  4},
  82 { -1, -5},{ -5, -1},{ -5,  1},{ -1,  5},{ -2, -5},{  2, -5},{  5, -2},{  5,  2},
  83 { -4, -4},{ -4,  4},{ -3, -5},{ -5, -3},{ -5,  3},{  3,  5},{ -6,  0},{  0,  6},
  84 { -6, -1},{ -6,  1},{  1,  6},{  2, -6},{ -6,  2},{  2,  6},{ -5, -4},{  5,  4},
  85 {  4,  5},{ -6, -3},{  6,  3},{ -7,  0},{ -1, -7},{  5, -5},{ -7,  1},{ -1,  7},
  86 {  4, -6},{  6,  4},{ -2, -7},{ -7,  2},{ -3, -7},{  7, -3},{  3,  7},{  6, -5},
  87 {  0, -8},{ -1, -8},{ -7, -4},{ -8,  1},{  4,  7},{  2, -8},{ -2,  8},{  6,  6},
  88 { -8,  3},{  5, -7},{ -5,  7},{  8, -4},{  0, -9},{ -9, -1},{  1,  9},{  7, -6},
  89 { -7,  6},{ -5, -8},{ -5,  8},{ -9,  3},{  9, -4},{  7, -7},{  8, -6},{  6,  8},
  90 { 10,  1},{-10,  2},{  9, -5},{ 10, -3},{ -8, -7},{-10, -4},{  6, -9},{-11,  0},
  91 { 11,  1},{-11, -2},{ -2, 11},{  7, -9},{ -7,  9},{ 10,  6},{ -4, 11},{  8, -9},
  92 {  8,  9},{  5, 11},{  7,-10},{ 12, -3},{ 11,  6},{ -9, -9},{  8, 10},{  5, 12},
  93 {-11,  7},{ 13,  2},{  6,-12},{ 10,  9},{-11,  8},{ -7, 12},{  0, 14},{ 14, -2},
  94 { -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{  5, 14},{-15, -1},{-14, -6},{  3,-15},
  95 { 11,-11},{ -7, 14},{ -5, 15},{  8,-14},{ 15,  6},{  3, 16},{  7,-15},{-16,  5},
  96 {  0, 17},{-16, -6},{-10, 14},{-16,  7},{ 12, 13},{-16,  8},{-17,  6},{-18,  3},
  97 { -7, 17},{ 15, 11},{ 16, 10},{  2,-19},{  3,-19},{-11,-16},{-18,  8},{-19, -6},
  98 {  2,-20},{-17,-11},{-10,-18},{  8, 19},{-21, -1},{-20,  7},{ -4, 21},{ 21,  5},
  99 { 15, 16},{  2,-22},{-10,-20},{-22,  5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5},
 100 { 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24},
 101 { 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27,  6},{  1,-28},
 102 {-11, 26},{-17,-23},{  7, 28},{ 11,-27},{ 29,  5},{-23,-19},{-28,-11},{-21, 22},
 103 {-30,  7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27},
 104 {-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32}
 105 };
 106
 107 // this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table
 108 static const uint8_t dequant_table[64]={
 109  16, 15, 13, 19, 24, 31, 28, 17,
 110  17, 23, 25, 31, 36, 63, 45, 21,
 111  18, 24, 27, 37, 52, 59, 49, 20,
 112  16, 28, 34, 40, 60, 80, 51, 20,
 113  18, 31, 48, 66, 68, 86, 56, 21,
 114  19, 38, 56, 59, 64, 64, 48, 20,
 115  27, 48, 55, 55, 56, 51, 35, 15,
 116  20, 35, 34, 32, 31, 22, 15,  8,
 117 };
 118
 119 static VLC block_type_vlc[2][4];
 120
 121
 122 typedef struct CFrameBuffer{
 123     unsigned int allocated_size;
 124     unsigned int size;
 125     int id;
 126     uint8_t *data;
 127 }CFrameBuffer;
 128
 129 typedef struct FourXContext{
 130     AVCodecContext *avctx;
 131     DSPContext dsp;
 132     AVFrame current_picture, last_picture;
 133     GetBitContext pre_gb;          ///< ac/dc prefix
 134     GetBitContext gb;
 135     const uint8_t *bytestream;
 136     const uint16_t *wordstream;
 137     int mv[256];
 138     VLC pre_vlc;
 139     int last_dc;
 140     DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
 141     void *bitstream_buffer;
 142     unsigned int bitstream_buffer_size;
 143     int version;
 144     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 145 } FourXContext;
 146
 147
 148 #define FIX_1_082392200  70936
 149 #define FIX_1_414213562  92682
 150 #define FIX_1_847759065 121095
 151 #define FIX_2_613125930 171254
 152
 153 #define MULTIPLY(var,const)  (((var)*(const)) >> 16)
 154
 155 static void idct(DCTELEM block[64]){
 156     int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 157     int tmp10, tmp11, tmp12, tmp13;
 158     int z5, z10, z11, z12, z13;
 159     int i;
 160     int temp[64];
 161
 162     for(i=0; i<8; i++){
 163         tmp10 = block[8*0 + i] + block[8*4 + i];
 164         tmp11 = block[8*0 + i] - block[8*4 + i];
 165
 166         tmp13 =          block[8*2 + i] + block[8*6 + i];
 167         tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13;
 168
 169         tmp0 = tmp10 + tmp13;
 170         tmp3 = tmp10 - tmp13;
 171         tmp1 = tmp11 + tmp12;
 172         tmp2 = tmp11 - tmp12;
 173
 174         z13 = block[8*5 + i] + block[8*3 + i];
 175         z10 = block[8*5 + i] - block[8*3 + i];
 176         z11 = block[8*1 + i] + block[8*7 + i];
 177         z12 = block[8*1 + i] - block[8*7 + i];
 178
 179         tmp7  =          z11 + z13;
 180         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 181
 182         z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
 183         tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
 184         tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
 185
 186         tmp6 = tmp12 - tmp7;
 187         tmp5 = tmp11 - tmp6;
 188         tmp4 = tmp10 + tmp5;
 189
 190         temp[8*0 + i] = tmp0 + tmp7;
 191         temp[8*7 + i] = tmp0 - tmp7;
 192         temp[8*1 + i] = tmp1 + tmp6;
 193         temp[8*6 + i] = tmp1 - tmp6;
 194         temp[8*2 + i] = tmp2 + tmp5;
 195         temp[8*5 + i] = tmp2 - tmp5;
 196         temp[8*4 + i] = tmp3 + tmp4;
 197         temp[8*3 + i] = tmp3 - tmp4;
 198     }
 199
 200     for(i=0; i<8*8; i+=8){
 201         tmp10 = temp[0 + i] + temp[4 + i];
 202         tmp11 = temp[0 + i] - temp[4 + i];
 203
 204         tmp13 = temp[2 + i] + temp[6 + i];
 205         tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;
 206
 207         tmp0 = tmp10 + tmp13;
 208         tmp3 = tmp10 - tmp13;
 209         tmp1 = tmp11 + tmp12;
 210         tmp2 = tmp11 - tmp12;
 211
 212         z13 = temp[5 + i] + temp[3 + i];
 213         z10 = temp[5 + i] - temp[3 + i];
 214         z11 = temp[1 + i] + temp[7 + i];
 215         z12 = temp[1 + i] - temp[7 + i];
 216
 217         tmp7 = z11 + z13;
 218         tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
 219
 220         z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
 221         tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
 222         tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
 223
 224         tmp6 = tmp12 - tmp7;
 225         tmp5 = tmp11 - tmp6;
 226         tmp4 = tmp10 + tmp5;
 227
 228         block[0 + i] = (tmp0 + tmp7)>>6;
 229         block[7 + i] = (tmp0 - tmp7)>>6;
 230         block[1 + i] = (tmp1 + tmp6)>>6;
 231         block[6 + i] = (tmp1 - tmp6)>>6;
 232         block[2 + i] = (tmp2 + tmp5)>>6;
 233         block[5 + i] = (tmp2 - tmp5)>>6;
 234         block[4 + i] = (tmp3 + tmp4)>>6;
 235         block[3 + i] = (tmp3 - tmp4)>>6;
 236     }
 237 }
 238
 239 static av_cold void init_vlcs(FourXContext *f){
 240     static VLC_TYPE table[8][32][2];
 241     int i;
 242
 243     for(i=0; i<8; i++){
 244         block_type_vlc[0][i].table= table[i];
 245         block_type_vlc[0][i].table_allocated= 32;
 246         init_vlc(&block_type_vlc[0][i], BLOCK_TYPE_VLC_BITS, 7,
 247                  &block_type_tab[0][i][0][1], 2, 1,
 248                  &block_type_tab[0][i][0][0], 2, 1, INIT_VLC_USE_NEW_STATIC);
 249     }
 250 }
 251
 252 static void init_mv(FourXContext *f){
 253     int i;
 254
 255     for(i=0; i<256; i++){
 256         if(f->version>1)
 257             f->mv[i] = mv[i][0]   + mv[i][1]  *f->current_picture.linesize[0]/2;
 258         else
 259             f->mv[i] = (i&15) - 8 + ((i>>4)-8)*f->current_picture.linesize[0]/2;
 260     }
 261 }
 262
 263 static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, int dc){
 264    int i;
 265    dc*= 0x10001;
 266
 267    switch(log2w){
 268    case 0:
 269         for(i=0; i<h; i++){
 270             dst[0] = scale*src[0] + dc;
 271             if(scale) src += stride;
 272             dst += stride;
 273         }
 274         break;
 275     case 1:
 276         for(i=0; i<h; i++){
 277             ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
 278             if(scale) src += stride;
 279             dst += stride;
 280         }
 281         break;
 282     case 2:
 283         for(i=0; i<h; i++){
 284             ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
 285             ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc;
 286             if(scale) src += stride;
 287             dst += stride;
 288         }
 289         break;
 290     case 3:
 291         for(i=0; i<h; i++){
 292             ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
 293             ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc;
 294             ((uint32_t*)dst)[2] = scale*((uint32_t*)src)[2] + dc;
 295             ((uint32_t*)dst)[3] = scale*((uint32_t*)src)[3] + dc;
 296             if(scale) src += stride;
 297             dst += stride;
 298         }
 299         break;
 300     default: assert(0);
 301     }
 302 }
 303
 304 static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int log2w, int log2h, int stride){
 305     const int index= size2index[log2h][log2w];
 306     const int h= 1<<log2h;
 307     int code= get_vlc2(&f->gb, block_type_vlc[1-(f->version>1)][index].table, BLOCK_TYPE_VLC_BITS, 1);
 308     uint16_t *start= (uint16_t*)f->last_picture.data[0];
 309     uint16_t *end= start + stride*(f->avctx->height-h+1) - (1<<log2w);
 310
 311     assert(code>=0 && code<=6);
 312
 313     if(code == 0){
 314         src += f->mv[ *f->bytestream++ ];
 315         if(start > src || src > end){
 316             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 317             return;
 318         }
 319         mcdc(dst, src, log2w, h, stride, 1, 0);
 320     }else if(code == 1){
 321         log2h--;
 322         decode_p_block(f, dst                  , src                  , log2w, log2h, stride);
 323         decode_p_block(f, dst + (stride<<log2h), src + (stride<<log2h), log2w, log2h, stride);
 324     }else if(code == 2){
 325         log2w--;
 326         decode_p_block(f, dst             , src             , log2w, log2h, stride);
 327         decode_p_block(f, dst + (1<<log2w), src + (1<<log2w), log2w, log2h, stride);
 328     }else if(code == 3 && f->version<2){
 329         mcdc(dst, src, log2w, h, stride, 1, 0);
 330     }else if(code == 4){
 331         src += f->mv[ *f->bytestream++ ];
 332         if(start > src || src > end){
 333             av_log(f->avctx, AV_LOG_ERROR, "mv out of pic\n");
 334             return;
 335         }
 336         mcdc(dst, src, log2w, h, stride, 1, le2me_16(*f->wordstream++));
 337     }else if(code == 5){
 338         mcdc(dst, src, log2w, h, stride, 0, le2me_16(*f->wordstream++));
 339     }else if(code == 6){
 340         if(log2w){
 341             dst[0] = le2me_16(*f->wordstream++);
 342             dst[1] = le2me_16(*f->wordstream++);
 343         }else{
 344             dst[0     ] = le2me_16(*f->wordstream++);
 345             dst[stride] = le2me_16(*f->wordstream++);
 346         }
 347     }
 348 }
 349
 350 static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length){
 351     int x, y;
 352     const int width= f->avctx->width;
 353     const int height= f->avctx->height;
 354     uint16_t *src= (uint16_t*)f->last_picture.data[0];
 355     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 356     const int stride= f->current_picture.linesize[0]>>1;
 357     unsigned int bitstream_size, bytestream_size, wordstream_size, extra;
 358
 359     if(f->version>1){
 360         extra=20;
 361         bitstream_size= AV_RL32(buf+8);
 362         wordstream_size= AV_RL32(buf+12);
 363         bytestream_size= AV_RL32(buf+16);
 364     }else{
 365         extra=0;
 366         bitstream_size = AV_RL16(buf-4);
 367         wordstream_size= AV_RL16(buf-2);
 368         bytestream_size= FFMAX(length - bitstream_size - wordstream_size, 0);
 369     }
 370
 371     if(bitstream_size+ bytestream_size+ wordstream_size + extra != length
 372        || bitstream_size  > (1<<26)
 373        || bytestream_size > (1<<26)
 374        || wordstream_size > (1<<26)
 375        ){
 376         av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
 377         bitstream_size+ bytestream_size+ wordstream_size - length);
 378         return -1;
 379     }
 380
 381     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 382     if (!f->bitstream_buffer)
 383         return AVERROR(ENOMEM);
 384     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)(buf + extra), bitstream_size/4);
 385     init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size);
 386
 387     f->wordstream= (const uint16_t*)(buf + extra + bitstream_size);
 388     f->bytestream= buf + extra + bitstream_size + wordstream_size;
 389
 390     init_mv(f);
 391
 392     for(y=0; y<height; y+=8){
 393         for(x=0; x<width; x+=8){
 394             decode_p_block(f, dst + x, src + x, 3, 3, stride);
 395         }
 396         src += 8*stride;
 397         dst += 8*stride;
 398     }
 399
 400     if(   bitstream_size != (get_bits_count(&f->gb)+31)/32*4
 401        || (((const char*)f->wordstream - (const char*)buf + 2)&~2) != extra + bitstream_size + wordstream_size
 402        || (((const char*)f->bytestream - (const char*)buf + 3)&~3) != extra + bitstream_size + wordstream_size + bytestream_size)
 403         av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n",
 404             bitstream_size - (get_bits_count(&f->gb)+31)/32*4,
 405             -(((const char*)f->bytestream - (const char*)buf + 3)&~3) + (extra + bitstream_size + wordstream_size + bytestream_size),
 406             -(((const char*)f->wordstream - (const char*)buf + 2)&~2) + (extra + bitstream_size + wordstream_size)
 407         );
 408
 409     return 0;
 410 }
 411
 412 /**
 413  * decode block and dequantize.
 414  * Note this is almost identical to MJPEG.
 415  */
 416 static int decode_i_block(FourXContext *f, DCTELEM *block){
 417     int code, i, j, level, val;
 418
 419     /* DC coef */
 420     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 421     if (val>>4){
 422         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
 423     }
 424
 425     if(val)
 426         val = get_xbits(&f->gb, val);
 427
 428     val = val * dequant_table[0] + f->last_dc;
 429     f->last_dc =
 430     block[0] = val;
 431     /* AC coefs */
 432     i = 1;
 433     for(;;) {
 434         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
 435
 436         /* EOB */
 437         if (code == 0)
 438             break;
 439         if (code == 0xf0) {
 440             i += 16;
 441         } else {
 442             level = get_xbits(&f->gb, code & 0xf);
 443             i += code >> 4;
 444             if (i >= 64) {
 445                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
 446                 return 0;
 447             }
 448
 449             j= ff_zigzag_direct[i];
 450             block[j] = level * dequant_table[j];
 451             i++;
 452             if (i >= 64)
 453                 break;
 454         }
 455     }
 456
 457     return 0;
 458 }
 459
 460 static inline void idct_put(FourXContext *f, int x, int y){
 461     DCTELEM (*block)[64]= f->block;
 462     int stride= f->current_picture.linesize[0]>>1;
 463     int i;
 464     uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
 465
 466     for(i=0; i<4; i++){
 467         block[i][0] += 0x80*8*8;
 468         idct(block[i]);
 469     }
 470
 471     if(!(f->avctx->flags&CODEC_FLAG_GRAY)){
 472         for(i=4; i<6; i++) idct(block[i]);
 473     }
 474
 475 /* Note transform is:
 476 y= ( 1b + 4g + 2r)/14
 477 cb=( 3b - 2g - 1r)/14
 478 cr=(-1b - 4g + 5r)/14
 479 */
 480     for(y=0; y<8; y++){
 481         for(x=0; x<8; x++){
 482             DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize
 483             int cb= block[4][x + 8*y];
 484             int cr= block[5][x + 8*y];
 485             int cg= (cb + cr)>>1;
 486             int y;
 487
 488             cb+=cb;
 489
 490             y = temp[0];
 491             dst[0       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 492             y = temp[1];
 493             dst[1       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 494             y = temp[8];
 495             dst[  stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 496             y = temp[9];
 497             dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
 498             dst += 2;
 499         }
 500         dst += 2*stride - 2*8;
 501     }
 502 }
 503
 504 static int decode_i_mb(FourXContext *f){
 505     int i;
 506
 507     f->dsp.clear_blocks(f->block[0]);
 508
 509     for(i=0; i<6; i++){
 510         if(decode_i_block(f, f->block[i]) < 0)
 511             return -1;
 512     }
 513
 514     return 0;
 515 }
 516
 517 static const uint8_t *read_huffman_tables(FourXContext *f, const uint8_t * const buf){
 518     int frequency[512];
 519     uint8_t flag[512];
 520     int up[512];
 521     uint8_t len_tab[257];
 522     int bits_tab[257];
 523     int start, end;
 524     const uint8_t *ptr= buf;
 525     int j;
 526
 527     memset(frequency, 0, sizeof(frequency));
 528     memset(up, -1, sizeof(up));
 529
 530     start= *ptr++;
 531     end= *ptr++;
 532     for(;;){
 533         int i;
 534
 535         for(i=start; i<=end; i++){
 536             frequency[i]= *ptr++;
 537         }
 538         start= *ptr++;
 539         if(start==0) break;
 540
 541         end= *ptr++;
 542     }
 543     frequency[256]=1;
 544
 545     while((ptr - buf)&3) ptr++; // 4byte align
 546
 547     for(j=257; j<512; j++){
 548         int min_freq[2]= {256*256, 256*256};
 549         int smallest[2]= {0, 0};
 550         int i;
 551         for(i=0; i<j; i++){
 552             if(frequency[i] == 0) continue;
 553             if(frequency[i] < min_freq[1]){
 554                 if(frequency[i] < min_freq[0]){
 555                     min_freq[1]= min_freq[0]; smallest[1]= smallest[0];
 556                     min_freq[0]= frequency[i];smallest[0]= i;
 557                 }else{
 558                     min_freq[1]= frequency[i];smallest[1]= i;
 559                 }
 560             }
 561         }
 562         if(min_freq[1] == 256*256) break;
 563
 564         frequency[j]= min_freq[0] + min_freq[1];
 565         flag[ smallest[0] ]= 0;
 566         flag[ smallest[1] ]= 1;
 567         up[ smallest[0] ]=
 568         up[ smallest[1] ]= j;
 569         frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0;
 570     }
 571
 572     for(j=0; j<257; j++){
 573         int node;
 574         int len=0;
 575         int bits=0;
 576
 577         for(node= j; up[node] != -1; node= up[node]){
 578             bits += flag[node]<<len;
 579             len++;
 580             if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ?
 581         }
 582
 583         bits_tab[j]= bits;
 584         len_tab[j]= len;
 585     }
 586
 587     init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257,
 588              len_tab , 1, 1,
 589              bits_tab, 4, 4, 0);
 590
 591     return ptr;
 592 }
 593
 594 static int mix(int c0, int c1){
 595     int blue = 2*(c0&0x001F) + (c1&0x001F);
 596     int green= (2*(c0&0x03E0) + (c1&0x03E0))>>5;
 597     int red  = 2*(c0>>10) + (c1>>10);
 598     return red/3*1024 + green/3*32 + blue/3;
 599 }
 600
 601 static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length){
 602     int x, y, x2, y2;
 603     const int width= f->avctx->width;
 604     const int height= f->avctx->height;
 605     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 606     const int stride= f->current_picture.linesize[0]>>1;
 607
 608     for(y=0; y<height; y+=16){
 609         for(x=0; x<width; x+=16){
 610             unsigned int color[4], bits;
 611             memset(color, 0, sizeof(color));
 612 //warning following is purely guessed ...
 613             color[0]= bytestream_get_le16(&buf);
 614             color[1]= bytestream_get_le16(&buf);
 615
 616             if(color[0]&0x8000) av_log(NULL, AV_LOG_ERROR, "unk bit 1\n");
 617             if(color[1]&0x8000) av_log(NULL, AV_LOG_ERROR, "unk bit 2\n");
 618
 619             color[2]= mix(color[0], color[1]);
 620             color[3]= mix(color[1], color[0]);
 621
 622             bits= bytestream_get_le32(&buf);
 623             for(y2=0; y2<16; y2++){
 624                 for(x2=0; x2<16; x2++){
 625                     int index= 2*(x2>>2) + 8*(y2>>2);
 626                     dst[y2*stride+x2]= color[(bits>>index)&3];
 627                 }
 628             }
 629             dst+=16;
 630         }
 631         dst += 16*stride - width;
 632     }
 633
 634     return 0;
 635 }
 636
 637 static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length){
 638     int x, y;
 639     const int width= f->avctx->width;
 640     const int height= f->avctx->height;
 641     uint16_t *dst= (uint16_t*)f->current_picture.data[0];
 642     const int stride= f->current_picture.linesize[0]>>1;
 643     const unsigned int bitstream_size= AV_RL32(buf);
 644     const int token_count av_unused = AV_RL32(buf + bitstream_size + 8);
 645     unsigned int prestream_size= 4*AV_RL32(buf + bitstream_size + 4);
 646     const uint8_t *prestream= buf + bitstream_size + 12;
 647
 648     if(prestream_size + bitstream_size + 12 != length
 649        || bitstream_size > (1<<26)
 650        || prestream_size > (1<<26)){
 651         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length);
 652         return -1;
 653     }
 654
 655     prestream= read_huffman_tables(f, prestream);
 656
 657     init_get_bits(&f->gb, buf + 4, 8*bitstream_size);
 658
 659     prestream_size= length + buf - prestream;
 660
 661     av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
 662     if (!f->bitstream_buffer)
 663         return AVERROR(ENOMEM);
 664     f->dsp.bswap_buf(f->bitstream_buffer, (const uint32_t*)prestream, prestream_size/4);
 665     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size);
 666
 667     f->last_dc= 0*128*8*8;
 668
 669     for(y=0; y<height; y+=16){
 670         for(x=0; x<width; x+=16){
 671             if(decode_i_mb(f) < 0)
 672                 return -1;
 673
 674             idct_put(f, x, y);
 675         }
 676         dst += 16*stride;
 677     }
 678
 679     if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
 680         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
 681
 682     return 0;
 683 }
 684
 685 static int decode_frame(AVCodecContext *avctx,
 686                         void *data, int *data_size,
 687                         AVPacket *avpkt)
 688 {
 689     const uint8_t *buf = avpkt->data;
 690     int buf_size = avpkt->size;
 691     FourXContext * const f = avctx->priv_data;
 692     AVFrame *picture = data;
 693     AVFrame *p, temp;
 694     int i, frame_4cc, frame_size;
 695
 696     frame_4cc= AV_RL32(buf);
 697     if(buf_size != AV_RL32(buf+4)+8 || buf_size < 20){
 698         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, AV_RL32(buf+4));
 699     }
 700
 701     if(frame_4cc == AV_RL32("cfrm")){
 702         int free_index=-1;
 703         const int data_size= buf_size - 20;
 704         const int id= AV_RL32(buf+12);
 705         const int whole_size= AV_RL32(buf+16);
 706         CFrameBuffer *cfrm;
 707
 708         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 709             if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
 710                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id);
 711         }
 712
 713         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 714             if(f->cfrm[i].id   == id) break;
 715             if(f->cfrm[i].size == 0 ) free_index= i;
 716         }
 717
 718         if(i>=CFRAME_BUFFER_COUNT){
 719             i= free_index;
 720             f->cfrm[i].id= id;
 721         }
 722         cfrm= &f->cfrm[i];
 723
 724         cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
 725         if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL
 726             av_log(f->avctx, AV_LOG_ERROR, "realloc falure");
 727             return -1;
 728         }
 729
 730         memcpy(cfrm->data + cfrm->size, buf+20, data_size);
 731         cfrm->size += data_size;
 732
 733         if(cfrm->size >= whole_size){
 734             buf= cfrm->data;
 735             frame_size= cfrm->size;
 736
 737             if(id != avctx->frame_number){
 738                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number);
 739             }
 740
 741             cfrm->size= cfrm->id= 0;
 742             frame_4cc= AV_RL32("pfrm");
 743         }else
 744             return buf_size;
 745     }else{
 746         buf= buf + 12;
 747         frame_size= buf_size - 12;
 748     }
 749
 750     temp= f->current_picture;
 751     f->current_picture= f->last_picture;
 752     f->last_picture= temp;
 753
 754     p= &f->current_picture;
 755     avctx->coded_frame= p;
 756
 757     avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management
 758
 759     if(p->data[0])
 760         avctx->release_buffer(avctx, p);
 761
 762     p->reference= 1;
 763     if(avctx->get_buffer(avctx, p) < 0){
 764         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
 765         return -1;
 766     }
 767
 768     if(frame_4cc == AV_RL32("ifr2")){
 769         p->pict_type= FF_I_TYPE;
 770         if(decode_i2_frame(f, buf-4, frame_size) < 0)
 771             return -1;
 772     }else if(frame_4cc == AV_RL32("ifrm")){
 773         p->pict_type= FF_I_TYPE;
 774         if(decode_i_frame(f, buf, frame_size) < 0)
 775             return -1;
 776     }else if(frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")){
 777         p->pict_type= FF_P_TYPE;
 778         if(decode_p_frame(f, buf, frame_size) < 0)
 779             return -1;
 780     }else if(frame_4cc == AV_RL32("snd_")){
 781         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size);
 782     }else{
 783         av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size);
 784     }
 785
 786     p->key_frame= p->pict_type == FF_I_TYPE;
 787
 788     *picture= *p;
 789     *data_size = sizeof(AVPicture);
 790
 791     emms_c();
 792
 793     return buf_size;
 794 }
 795
 796
 797 static av_cold void common_init(AVCodecContext *avctx){
 798     FourXContext * const f = avctx->priv_data;
 799
 800     dsputil_init(&f->dsp, avctx);
 801
 802     f->avctx= avctx;
 803 }
 804
 805 static av_cold int decode_init(AVCodecContext *avctx){
 806     FourXContext * const f = avctx->priv_data;
 807
 808     if(avctx->extradata_size != 4 || !avctx->extradata) {
 809         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
 810         return 1;
 811     }
 812
 813     f->version= AV_RL32(avctx->extradata)>>16;
 814     common_init(avctx);
 815     init_vlcs(f);
 816
 817     if(f->version>2) avctx->pix_fmt= PIX_FMT_RGB565;
 818     else             avctx->pix_fmt= PIX_FMT_RGB555;
 819
 820     return 0;
 821 }
 822
 823
 824 static av_cold int decode_end(AVCodecContext *avctx){
 825     FourXContext * const f = avctx->priv_data;
 826     int i;
 827
 828     av_freep(&f->bitstream_buffer);
 829     f->bitstream_buffer_size=0;
 830     for(i=0; i<CFRAME_BUFFER_COUNT; i++){
 831         av_freep(&f->cfrm[i].data);
 832         f->cfrm[i].allocated_size= 0;
 833     }
 834     free_vlc(&f->pre_vlc);
 835
 836     return 0;
 837 }
 838
 839 AVCodec fourxm_decoder = {
 840     "4xm",
 841     CODEC_TYPE_VIDEO,
 842     CODEC_ID_4XM,
 843     sizeof(FourXContext),
 844     decode_init,
 845     NULL,
 846     decode_end,
 847     decode_frame,
 848     /*CODEC_CAP_DR1,*/
 849     .long_name = NULL_IF_CONFIG_SMALL("4X Movie"),
 850 };
 851