libavcodec/svq1.c

   1 /*
   2  *
   3  * Copyright (C) 2002 the xine project
   4  * Copyright (C) 2002 the ffmpeg project
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  *
  20  * (SVQ1 Decoder)
  21  * Ported to mplayer by Arpi <arpi@thot.banki.hu>
  22  * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
  23  *
  24  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  25  */
  26
  27 /**
  28  * @file svq1.c
  29  * Sorenson Vector Quantizer #1 (SVQ1) video codec.
  30  * For more information of the SVQ1 algorithm, visit:
  31  *   http://www.pcisys.net/~melanson/codecs/
  32  */
  33
  34
  35 //#define DEBUG_SVQ1
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <unistd.h>
  40 #include <limits.h>
  41
  42 #include "common.h"
  43 #include "avcodec.h"
  44 #include "dsputil.h"
  45 #include "mpegvideo.h"
  46 #include "bswap.h"
  47
  48 #undef NDEBUG
  49 #include <assert.h>
  50
  51 extern const uint8_t mvtab[33][2];
  52
  53 static VLC svq1_block_type;
  54 static VLC svq1_motion_component;
  55 static VLC svq1_intra_multistage[6];
  56 static VLC svq1_inter_multistage[6];
  57 static VLC svq1_intra_mean;
  58 static VLC svq1_inter_mean;
  59
  60 #define SVQ1_BLOCK_SKIP         0
  61 #define SVQ1_BLOCK_INTER        1
  62 #define SVQ1_BLOCK_INTER_4V     2
  63 #define SVQ1_BLOCK_INTRA        3
  64
  65 typedef struct SVQ1Context {
  66     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
  67     AVCodecContext *avctx;
  68     DSPContext dsp;
  69     AVFrame picture;
  70     AVFrame current_picture;
  71     AVFrame last_picture;
  72     PutBitContext pb;
  73     GetBitContext gb;
  74
  75     PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
  76
  77     int frame_width;
  78     int frame_height;
  79
  80     /* Y plane block dimensions */
  81     int y_block_width;
  82     int y_block_height;
  83
  84     /* U & V plane (C planes) block dimensions */
  85     int c_block_width;
  86     int c_block_height;
  87
  88     uint16_t *mb_type;
  89     uint32_t *dummy;
  90     int16_t (*motion_val8[3])[2];
  91     int16_t (*motion_val16[3])[2];
  92
  93     int64_t rd_total;
  94 } SVQ1Context;
  95
  96 /* motion vector (prediction) */
  97 typedef struct svq1_pmv_s {
  98   int           x;
  99   int           y;
 100 } svq1_pmv_t;
 101
 102 #include "svq1_cb.h"
 103 #include "svq1_vlc.h"
 104
 105 static const uint16_t checksum_table[256] = {
 106   0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
 107   0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
 108   0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
 109   0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
 110   0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
 111   0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
 112   0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
 113   0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
 114   0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
 115   0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
 116   0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
 117   0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
 118   0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
 119   0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
 120   0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
 121   0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
 122   0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
 123   0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
 124   0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
 125   0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
 126   0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
 127   0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 128   0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
 129   0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
 130   0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
 131   0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
 132   0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
 133   0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
 134   0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
 135   0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
 136   0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
 137   0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
 138 };
 139
 140 static const uint8_t string_table[256] = {
 141   0x00, 0xD5, 0x7F, 0xAA, 0xFE, 0x2B, 0x81, 0x54,
 142   0x29, 0xFC, 0x56, 0x83, 0xD7, 0x02, 0xA8, 0x7D,
 143   0x52, 0x87, 0x2D, 0xF8, 0xAC, 0x79, 0xD3, 0x06,
 144   0x7B, 0xAE, 0x04, 0xD1, 0x85, 0x50, 0xFA, 0x2F,
 145   0xA4, 0x71, 0xDB, 0x0E, 0x5A, 0x8F, 0x25, 0xF0,
 146   0x8D, 0x58, 0xF2, 0x27, 0x73, 0xA6, 0x0C, 0xD9,
 147   0xF6, 0x23, 0x89, 0x5C, 0x08, 0xDD, 0x77, 0xA2,
 148   0xDF, 0x0A, 0xA0, 0x75, 0x21, 0xF4, 0x5E, 0x8B,
 149   0x9D, 0x48, 0xE2, 0x37, 0x63, 0xB6, 0x1C, 0xC9,
 150   0xB4, 0x61, 0xCB, 0x1E, 0x4A, 0x9F, 0x35, 0xE0,
 151   0xCF, 0x1A, 0xB0, 0x65, 0x31, 0xE4, 0x4E, 0x9B,
 152   0xE6, 0x33, 0x99, 0x4C, 0x18, 0xCD, 0x67, 0xB2,
 153   0x39, 0xEC, 0x46, 0x93, 0xC7, 0x12, 0xB8, 0x6D,
 154   0x10, 0xC5, 0x6F, 0xBA, 0xEE, 0x3B, 0x91, 0x44,
 155   0x6B, 0xBE, 0x14, 0xC1, 0x95, 0x40, 0xEA, 0x3F,
 156   0x42, 0x97, 0x3D, 0xE8, 0xBC, 0x69, 0xC3, 0x16,
 157   0xEF, 0x3A, 0x90, 0x45, 0x11, 0xC4, 0x6E, 0xBB,
 158   0xC6, 0x13, 0xB9, 0x6C, 0x38, 0xED, 0x47, 0x92,
 159   0xBD, 0x68, 0xC2, 0x17, 0x43, 0x96, 0x3C, 0xE9,
 160   0x94, 0x41, 0xEB, 0x3E, 0x6A, 0xBF, 0x15, 0xC0,
 161   0x4B, 0x9E, 0x34, 0xE1, 0xB5, 0x60, 0xCA, 0x1F,
 162   0x62, 0xB7, 0x1D, 0xC8, 0x9C, 0x49, 0xE3, 0x36,
 163   0x19, 0xCC, 0x66, 0xB3, 0xE7, 0x32, 0x98, 0x4D,
 164   0x30, 0xE5, 0x4F, 0x9A, 0xCE, 0x1B, 0xB1, 0x64,
 165   0x72, 0xA7, 0x0D, 0xD8, 0x8C, 0x59, 0xF3, 0x26,
 166   0x5B, 0x8E, 0x24, 0xF1, 0xA5, 0x70, 0xDA, 0x0F,
 167   0x20, 0xF5, 0x5F, 0x8A, 0xDE, 0x0B, 0xA1, 0x74,
 168   0x09, 0xDC, 0x76, 0xA3, 0xF7, 0x22, 0x88, 0x5D,
 169   0xD6, 0x03, 0xA9, 0x7C, 0x28, 0xFD, 0x57, 0x82,
 170   0xFF, 0x2A, 0x80, 0x55, 0x01, 0xD4, 0x7E, 0xAB,
 171   0x84, 0x51, 0xFB, 0x2E, 0x7A, 0xAF, 0x05, 0xD0,
 172   0xAD, 0x78, 0xD2, 0x07, 0x53, 0x86, 0x2C, 0xF9
 173 };
 174
 175 #define SVQ1_PROCESS_VECTOR()\
 176     for (; level > 0; i++) {\
 177       /* process next depth */\
 178       if (i == m) {\
 179         m = n;\
 180         if (--level == 0)\
 181           break;\
 182       }\
 183       /* divide block if next bit set */\
 184       if (get_bits (bitbuf, 1) == 0)\
 185         break;\
 186       /* add child nodes */\
 187       list[n++] = list[i];\
 188       list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level / 2) + 1));\
 189     }
 190
 191 #define SVQ1_ADD_CODEBOOK()\
 192           /* add codebook entries to vector */\
 193           for (j=0; j < stages; j++) {\
 194             n3  = codebook[entries[j]] ^ 0x80808080;\
 195             n1 += ((n3 & 0xFF00FF00) >> 8);\
 196             n2 +=  (n3 & 0x00FF00FF);\
 197           }\
 198 \
 199           /* clip to [0..255] */\
 200           if (n1 & 0xFF00FF00) {\
 201             n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 202             n1 += 0x7F007F00;\
 203             n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 204             n1 &= (n3 & 0x00FF00FF);\
 205           }\
 206 \
 207           if (n2 & 0xFF00FF00) {\
 208             n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 209             n2 += 0x7F007F00;\
 210             n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 211             n2 &= (n3 & 0x00FF00FF);\
 212           }
 213
 214 #define SVQ1_DO_CODEBOOK_INTRA()\
 215       for (y=0; y < height; y++) {\
 216         for (x=0; x < (width / 4); x++, codebook++) {\
 217         n1 = n4;\
 218         n2 = n4;\
 219         SVQ1_ADD_CODEBOOK()\
 220         /* store result */\
 221         dst[x] = (n1 << 8) | n2;\
 222         }\
 223         dst += (pitch / 4);\
 224       }
 225
 226 #define SVQ1_DO_CODEBOOK_NONINTRA()\
 227       for (y=0; y < height; y++) {\
 228         for (x=0; x < (width / 4); x++, codebook++) {\
 229         n3 = dst[x];\
 230         /* add mean value to vector */\
 231         n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
 232         n2 =  (n3 & 0x00FF00FF)          + n4;\
 233         SVQ1_ADD_CODEBOOK()\
 234         /* store result */\
 235         dst[x] = (n1 << 8) | n2;\
 236         }\
 237         dst += (pitch / 4);\
 238       }
 239
 240 #define SVQ1_CALC_CODEBOOK_ENTRIES(cbook)\
 241       codebook = (const uint32_t *) cbook[level];\
 242       bit_cache = get_bits (bitbuf, 4*stages);\
 243       /* calculate codebook entries for this vector */\
 244       for (j=0; j < stages; j++) {\
 245         entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
 246       }\
 247       mean -= (stages * 128);\
 248       n4    = ((mean + (mean >> 31)) << 16) | (mean & 0xFFFF);
 249
 250 static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 251   uint32_t    bit_cache;
 252   uint8_t    *list[63];
 253   uint32_t   *dst;
 254   const uint32_t *codebook;
 255   int         entries[6];
 256   int         i, j, m, n;
 257   int         mean, stages;
 258   unsigned    x, y, width, height, level;
 259   uint32_t    n1, n2, n3, n4;
 260
 261   /* initialize list for breadth first processing of vectors */
 262   list[0] = pixels;
 263
 264   /* recursively process vector */
 265   for (i=0, m=1, n=1, level=5; i < n; i++) {
 266     SVQ1_PROCESS_VECTOR();
 267
 268     /* destination address and vector size */
 269     dst = (uint32_t *) list[i];
 270     width = 1 << ((4 + level) /2);
 271     height = 1 << ((3 + level) /2);
 272
 273     /* get number of stages (-1 skips vector, 0 for mean only) */
 274     stages = get_vlc2(bitbuf, svq1_intra_multistage[level].table, 3, 3) - 1;
 275
 276     if (stages == -1) {
 277         for (y=0; y < height; y++) {
 278           memset (&dst[y*(pitch / 4)], 0, width);
 279         }
 280       continue;                 /* skip vector */
 281     }
 282
 283     if ((stages > 0) && (level >= 4)) {
 284 #ifdef DEBUG_SVQ1
 285     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
 286 #endif
 287       return -1;        /* invalid vector */
 288     }
 289
 290     mean = get_vlc2(bitbuf, svq1_intra_mean.table, 8, 3);
 291
 292     if (stages == 0) {
 293       for (y=0; y < height; y++) {
 294         memset (&dst[y*(pitch / 4)], mean, width);
 295       }
 296     } else {
 297       SVQ1_CALC_CODEBOOK_ENTRIES(svq1_intra_codebooks);
 298       SVQ1_DO_CODEBOOK_INTRA()
 299     }
 300   }
 301
 302   return 0;
 303 }
 304
 305 static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 306   uint32_t    bit_cache;
 307   uint8_t    *list[63];
 308   uint32_t   *dst;
 309   const uint32_t *codebook;
 310   int         entries[6];
 311   int         i, j, m, n;
 312   int         mean, stages;
 313   int         x, y, width, height, level;
 314   uint32_t    n1, n2, n3, n4;
 315
 316   /* initialize list for breadth first processing of vectors */
 317   list[0] = pixels;
 318
 319   /* recursively process vector */
 320   for (i=0, m=1, n=1, level=5; i < n; i++) {
 321     SVQ1_PROCESS_VECTOR();
 322
 323     /* destination address and vector size */
 324     dst = (uint32_t *) list[i];
 325     width = 1 << ((4 + level) /2);
 326     height = 1 << ((3 + level) /2);
 327
 328     /* get number of stages (-1 skips vector, 0 for mean only) */
 329     stages = get_vlc2(bitbuf, svq1_inter_multistage[level].table, 3, 2) - 1;
 330
 331     if (stages == -1) continue; /* skip vector */
 332
 333     if ((stages > 0) && (level >= 4)) {
 334 #ifdef DEBUG_SVQ1
 335     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
 336 #endif
 337       return -1;        /* invalid vector */
 338     }
 339
 340     mean = get_vlc2(bitbuf, svq1_inter_mean.table, 9, 3) - 256;
 341
 342     SVQ1_CALC_CODEBOOK_ENTRIES(svq1_inter_codebooks);
 343     SVQ1_DO_CODEBOOK_NONINTRA()
 344   }
 345   return 0;
 346 }
 347
 348 static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
 349   int        diff;
 350   int        i;
 351
 352   for (i=0; i < 2; i++) {
 353
 354     /* get motion code */
 355     diff = get_vlc2(bitbuf, svq1_motion_component.table, 7, 2);
 356     if(diff<0)
 357         return -1;
 358     else if(diff){
 359         if(get_bits1(bitbuf)) diff= -diff;
 360     }
 361
 362     /* add median of motion vector predictors and clip result */
 363     if (i == 1)
 364       mv->y = ((diff + mid_pred(pmv[0]->y, pmv[1]->y, pmv[2]->y)) << 26) >> 26;
 365     else
 366       mv->x = ((diff + mid_pred(pmv[0]->x, pmv[1]->x, pmv[2]->x)) << 26) >> 26;
 367   }
 368
 369   return 0;
 370 }
 371
 372 static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int x, int y) {
 373   uint8_t *src;
 374   uint8_t *dst;
 375   int      i;
 376
 377   src = &previous[x + y*pitch];
 378   dst = current;
 379
 380   for (i=0; i < 16; i++) {
 381     memcpy (dst, src, 16);
 382     src += pitch;
 383     dst += pitch;
 384   }
 385 }
 386
 387 static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
 388                                uint8_t *current, uint8_t *previous, int pitch,
 389                                svq1_pmv_t *motion, int x, int y) {
 390   uint8_t    *src;
 391   uint8_t    *dst;
 392   svq1_pmv_t  mv;
 393   svq1_pmv_t *pmv[3];
 394   int         result;
 395
 396   /* predict and decode motion vector */
 397   pmv[0] = &motion[0];
 398   if (y == 0) {
 399     pmv[1] =
 400     pmv[2] = pmv[0];
 401   }
 402   else {
 403     pmv[1] = &motion[(x / 8) + 2];
 404     pmv[2] = &motion[(x / 8) + 4];
 405   }
 406
 407   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 408
 409   if (result != 0)
 410     return result;
 411
 412   motion[0].x                =
 413   motion[(x / 8) + 2].x      =
 414   motion[(x / 8) + 3].x      = mv.x;
 415   motion[0].y                =
 416   motion[(x / 8) + 2].y      =
 417   motion[(x / 8) + 3].y      = mv.y;
 418
 419   if(y + (mv.y >> 1)<0)
 420      mv.y= 0;
 421   if(x + (mv.x >> 1)<0)
 422      mv.x= 0;
 423
 424 #if 0
 425   int w= (s->width+15)&~15;
 426   int h= (s->height+15)&~15;
 427   if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
 428       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
 429 #endif
 430
 431   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
 432   dst = current;
 433
 434   s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
 435
 436   return 0;
 437 }
 438
 439 static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
 440                                   uint8_t *current, uint8_t *previous, int pitch,
 441                                   svq1_pmv_t *motion,int x, int y) {
 442   uint8_t    *src;
 443   uint8_t    *dst;
 444   svq1_pmv_t  mv;
 445   svq1_pmv_t *pmv[4];
 446   int         i, result;
 447
 448   /* predict and decode motion vector (0) */
 449   pmv[0] = &motion[0];
 450   if (y == 0) {
 451     pmv[1] =
 452     pmv[2] = pmv[0];
 453   }
 454   else {
 455     pmv[1] = &motion[(x / 8) + 2];
 456     pmv[2] = &motion[(x / 8) + 4];
 457   }
 458
 459   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 460
 461   if (result != 0)
 462     return result;
 463
 464   /* predict and decode motion vector (1) */
 465   pmv[0] = &mv;
 466   if (y == 0) {
 467     pmv[1] =
 468     pmv[2] = pmv[0];
 469   }
 470   else {
 471     pmv[1] = &motion[(x / 8) + 3];
 472   }
 473   result = svq1_decode_motion_vector (bitbuf, &motion[0], pmv);
 474
 475   if (result != 0)
 476     return result;
 477
 478   /* predict and decode motion vector (2) */
 479   pmv[1] = &motion[0];
 480   pmv[2] = &motion[(x / 8) + 1];
 481
 482   result = svq1_decode_motion_vector (bitbuf, &motion[(x / 8) + 2], pmv);
 483
 484   if (result != 0)
 485     return result;
 486
 487   /* predict and decode motion vector (3) */
 488   pmv[2] = &motion[(x / 8) + 2];
 489   pmv[3] = &motion[(x / 8) + 3];
 490
 491   result = svq1_decode_motion_vector (bitbuf, pmv[3], pmv);
 492
 493   if (result != 0)
 494     return result;
 495
 496   /* form predictions */
 497   for (i=0; i < 4; i++) {
 498     int mvx= pmv[i]->x + (i&1)*16;
 499     int mvy= pmv[i]->y + (i>>1)*16;
 500
 501     ///XXX /FIXME cliping or padding?
 502     if(y + (mvy >> 1)<0)
 503        mvy= 0;
 504     if(x + (mvx >> 1)<0)
 505        mvx= 0;
 506
 507 #if 0
 508   int w= (s->width+15)&~15;
 509   int h= (s->height+15)&~15;
 510   if(x + (mvx >> 1)<0 || y + (mvy >> 1)<0 || x + (mvx >> 1) + 8 > w || y + (mvy >> 1) + 8> h)
 511       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mvx >> 1), y + (mvy >> 1));
 512 #endif
 513     src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
 514     dst = current;
 515
 516     s->dsp.put_pixels_tab[1][((mvy & 1) << 1) | (mvx & 1)](dst,src,pitch,8);
 517
 518     /* select next block */
 519     if (i & 1) {
 520       current  += 8*(pitch - 1);
 521     } else {
 522       current  += 8;
 523     }
 524   }
 525
 526   return 0;
 527 }
 528
 529 static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
 530                         uint8_t *current, uint8_t *previous, int pitch,
 531                         svq1_pmv_t *motion, int x, int y) {
 532   uint32_t block_type;
 533   int      result = 0;
 534
 535   /* get block type */
 536   block_type = get_vlc2(bitbuf, svq1_block_type.table, 2, 2);
 537
 538   /* reset motion vectors */
 539   if (block_type == SVQ1_BLOCK_SKIP || block_type == SVQ1_BLOCK_INTRA) {
 540     motion[0].x                 =
 541     motion[0].y                 =
 542     motion[(x / 8) + 2].x =
 543     motion[(x / 8) + 2].y =
 544     motion[(x / 8) + 3].x =
 545     motion[(x / 8) + 3].y = 0;
 546   }
 547
 548   switch (block_type) {
 549   case SVQ1_BLOCK_SKIP:
 550     svq1_skip_block (current, previous, pitch, x, y);
 551     break;
 552
 553   case SVQ1_BLOCK_INTER:
 554     result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
 555
 556     if (result != 0)
 557     {
 558 #ifdef DEBUG_SVQ1
 559     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result);
 560 #endif
 561       break;
 562     }
 563     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 564     break;
 565
 566   case SVQ1_BLOCK_INTER_4V:
 567     result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
 568
 569     if (result != 0)
 570     {
 571 #ifdef DEBUG_SVQ1
 572     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result);
 573 #endif
 574       break;
 575     }
 576     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 577     break;
 578
 579   case SVQ1_BLOCK_INTRA:
 580     result = svq1_decode_block_intra (bitbuf, current, pitch);
 581     break;
 582   }
 583
 584   return result;
 585 }
 586
 587 /* standard video sizes */
 588 static struct { int width; int height; } svq1_frame_size_table[8] = {
 589   { 160, 120 }, { 128,  96 }, { 176, 144 }, { 352, 288 },
 590   { 704, 576 }, { 240, 180 }, { 320, 240 }, {  -1,  -1 }
 591 };
 592
 593 static uint16_t svq1_packet_checksum (uint8_t *data, int length, int value) {
 594   int i;
 595
 596   for (i=0; i < length; i++) {
 597     value = checksum_table[data[i] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 598   }
 599
 600   return value;
 601 }
 602
 603 #if 0 /* unused, remove? */
 604 static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch,
 605                                          int width, int height, int value) {
 606   int x, y;
 607
 608   for (y=0; y < height; y++) {
 609     for (x=0; x < width; x++) {
 610       value = checksum_table[pixels[x] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 611     }
 612
 613     pixels += pitch;
 614   }
 615
 616   return value;
 617 }
 618 #endif
 619
 620 #ifdef CONFIG_DECODERS
 621 static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) {
 622   uint8_t seed;
 623   int     i;
 624
 625   out[0] = get_bits (bitbuf, 8);
 626
 627   seed = string_table[out[0]];
 628
 629   for (i=1; i <= out[0]; i++) {
 630     out[i] = get_bits (bitbuf, 8) ^ seed;
 631     seed   = string_table[out[i] ^ seed];
 632   }
 633 }
 634
 635 static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
 636   int frame_size_code;
 637   int temporal_reference;
 638
 639   temporal_reference = get_bits (bitbuf, 8);
 640
 641   /* frame type */
 642   s->pict_type= get_bits (bitbuf, 2)+1;
 643   if(s->pict_type==4)
 644       return -1;
 645
 646   if (s->pict_type == I_TYPE) {
 647
 648     /* unknown fields */
 649     if (s->f_code == 0x50 || s->f_code == 0x60) {
 650       int csum = get_bits (bitbuf, 16);
 651
 652       csum = svq1_packet_checksum ((uint8_t *)bitbuf->buffer, bitbuf->size_in_bits>>3, csum);
 653
 654 //      av_log(s->avctx, AV_LOG_INFO, "%s checksum (%02x) for packet data\n",
 655 //              (csum == 0) ? "correct" : "incorrect", csum);
 656     }
 657
 658     if ((s->f_code ^ 0x10) >= 0x50) {
 659       uint8_t msg[256];
 660
 661       svq1_parse_string (bitbuf, msg);
 662
 663       av_log(s->avctx, AV_LOG_INFO, "embedded message: \"%s\"\n", (char *) msg);
 664     }
 665
 666     skip_bits (bitbuf, 2);
 667     skip_bits (bitbuf, 2);
 668     skip_bits1 (bitbuf);
 669
 670     /* load frame size */
 671     frame_size_code = get_bits (bitbuf, 3);
 672
 673     if (frame_size_code == 7) {
 674       /* load width, height (12 bits each) */
 675       s->width = get_bits (bitbuf, 12);
 676       s->height = get_bits (bitbuf, 12);
 677
 678       if (!s->width || !s->height)
 679         return -1;
 680     } else {
 681       /* get width, height from table */
 682       s->width = svq1_frame_size_table[frame_size_code].width;
 683       s->height = svq1_frame_size_table[frame_size_code].height;
 684     }
 685   }
 686
 687   /* unknown fields */
 688   if (get_bits (bitbuf, 1) == 1) {
 689     skip_bits1 (bitbuf);       /* use packet checksum if (1) */
 690     skip_bits1 (bitbuf);       /* component checksums after image data if (1) */
 691
 692     if (get_bits (bitbuf, 2) != 0)
 693       return -1;
 694   }
 695
 696   if (get_bits (bitbuf, 1) == 1) {
 697     skip_bits1 (bitbuf);
 698     skip_bits (bitbuf, 4);
 699     skip_bits1 (bitbuf);
 700     skip_bits (bitbuf, 2);
 701
 702     while (get_bits (bitbuf, 1) == 1) {
 703       skip_bits (bitbuf, 8);
 704     }
 705   }
 706
 707   return 0;
 708 }
 709
 710 static int svq1_decode_frame(AVCodecContext *avctx,
 711                              void *data, int *data_size,
 712                              uint8_t *buf, int buf_size)
 713 {
 714   MpegEncContext *s=avctx->priv_data;
 715   uint8_t        *current, *previous;
 716   int             result, i, x, y, width, height;
 717   AVFrame *pict = data;
 718
 719   /* initialize bit buffer */
 720   init_get_bits(&s->gb,buf,buf_size*8);
 721
 722   /* decode frame header */
 723   s->f_code = get_bits (&s->gb, 22);
 724
 725   if ((s->f_code & ~0x70) || !(s->f_code & 0x60))
 726     return -1;
 727
 728   /* swap some header bytes (why?) */
 729   if (s->f_code != 0x20) {
 730     uint32_t *src = (uint32_t *) (buf + 4);
 731
 732     for (i=0; i < 4; i++) {
 733       src[i] = ((src[i] << 16) | (src[i] >> 16)) ^ src[7 - i];
 734     }
 735   }
 736
 737   result = svq1_decode_frame_header (&s->gb, s);
 738
 739   if (result != 0)
 740   {
 741 #ifdef DEBUG_SVQ1
 742     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result);
 743 #endif
 744     return result;
 745   }
 746
 747   //FIXME this avoids some confusion for "B frames" without 2 references
 748   //this should be removed after libavcodec can handle more flexible picture types & ordering
 749   if(s->pict_type==B_TYPE && s->last_picture_ptr==NULL) return buf_size;
 750
 751   if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
 752   if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
 753      ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
 754      || avctx->skip_frame >= AVDISCARD_ALL)
 755       return buf_size;
 756
 757   if(MPV_frame_start(s, avctx) < 0)
 758       return -1;
 759
 760   /* decode y, u and v components */
 761   for (i=0; i < 3; i++) {
 762     int linesize;
 763     if (i == 0) {
 764       width  = (s->width+15)&~15;
 765       height = (s->height+15)&~15;
 766       linesize= s->linesize;
 767     } else {
 768       if(s->flags&CODEC_FLAG_GRAY) break;
 769       width  = (s->width/4+15)&~15;
 770       height = (s->height/4+15)&~15;
 771       linesize= s->uvlinesize;
 772     }
 773
 774     current  = s->current_picture.data[i];
 775
 776     if(s->pict_type==B_TYPE){
 777         previous = s->next_picture.data[i];
 778     }else{
 779         previous = s->last_picture.data[i];
 780     }
 781
 782     if (s->pict_type == I_TYPE) {
 783       /* keyframe */
 784       for (y=0; y < height; y+=16) {
 785         for (x=0; x < width; x+=16) {
 786           result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
 787           if (result != 0)
 788           {
 789 //#ifdef DEBUG_SVQ1
 790             av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
 791 //#endif
 792             return result;
 793           }
 794         }
 795         current += 16*linesize;
 796       }
 797     } else {
 798       svq1_pmv_t pmv[width/8+3];
 799       /* delta frame */
 800       memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
 801
 802       for (y=0; y < height; y+=16) {
 803         for (x=0; x < width; x+=16) {
 804           result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
 805                                             linesize, pmv, x, y);
 806           if (result != 0)
 807           {
 808 #ifdef DEBUG_SVQ1
 809     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
 810 #endif
 811             return result;
 812           }
 813         }
 814
 815         pmv[0].x =
 816         pmv[0].y = 0;
 817
 818         current += 16*linesize;
 819       }
 820     }
 821   }
 822
 823   *pict = *(AVFrame*)&s->current_picture;
 824
 825
 826   MPV_frame_end(s);
 827
 828   *data_size=sizeof(AVFrame);
 829   return buf_size;
 830 }
 831
 832 static int svq1_decode_init(AVCodecContext *avctx)
 833 {
 834     MpegEncContext *s = avctx->priv_data;
 835     int i;
 836
 837     MPV_decode_defaults(s);
 838
 839     s->avctx = avctx;
 840     s->width = (avctx->width+3)&~3;
 841     s->height = (avctx->height+3)&~3;
 842     s->codec_id= avctx->codec->id;
 843     avctx->pix_fmt = PIX_FMT_YUV410P;
 844     avctx->has_b_frames= 1; // not true, but DP frames and these behave like unidirectional b frames
 845     s->flags= avctx->flags;
 846     if (MPV_common_init(s) < 0) return -1;
 847
 848     init_vlc(&svq1_block_type, 2, 4,
 849         &svq1_block_type_vlc[0][1], 2, 1,
 850         &svq1_block_type_vlc[0][0], 2, 1, 1);
 851
 852     init_vlc(&svq1_motion_component, 7, 33,
 853         &mvtab[0][1], 2, 1,
 854         &mvtab[0][0], 2, 1, 1);
 855
 856     for (i = 0; i < 6; i++) {
 857         init_vlc(&svq1_intra_multistage[i], 3, 8,
 858             &svq1_intra_multistage_vlc[i][0][1], 2, 1,
 859             &svq1_intra_multistage_vlc[i][0][0], 2, 1, 1);
 860         init_vlc(&svq1_inter_multistage[i], 3, 8,
 861             &svq1_inter_multistage_vlc[i][0][1], 2, 1,
 862             &svq1_inter_multistage_vlc[i][0][0], 2, 1, 1);
 863     }
 864
 865     init_vlc(&svq1_intra_mean, 8, 256,
 866         &svq1_intra_mean_vlc[0][1], 4, 2,
 867         &svq1_intra_mean_vlc[0][0], 4, 2, 1);
 868
 869     init_vlc(&svq1_inter_mean, 9, 512,
 870         &svq1_inter_mean_vlc[0][1], 4, 2,
 871         &svq1_inter_mean_vlc[0][0], 4, 2, 1);
 872
 873     return 0;
 874 }
 875
 876 static int svq1_decode_end(AVCodecContext *avctx)
 877 {
 878     MpegEncContext *s = avctx->priv_data;
 879
 880     MPV_common_end(s);
 881     return 0;
 882 }
 883 #endif /* CONFIG_DECODERS */
 884
 885 #ifdef CONFIG_ENCODERS
 886 static void svq1_write_header(SVQ1Context *s, int frame_type)
 887 {
 888     int i;
 889
 890     /* frame code */
 891     put_bits(&s->pb, 22, 0x20);
 892
 893     /* temporal reference (sure hope this is a "don't care") */
 894     put_bits(&s->pb, 8, 0x00);
 895
 896     /* frame type */
 897     put_bits(&s->pb, 2, frame_type - 1);
 898
 899     if (frame_type == I_TYPE) {
 900
 901         /* no checksum since frame code is 0x20 */
 902
 903         /* no embedded string either */
 904
 905         /* output 5 unknown bits (2 + 2 + 1) */
 906         put_bits(&s->pb, 5, 0);
 907
 908         for (i = 0; i < 7; i++)
 909         {
 910             if ((svq1_frame_size_table[i].width == s->frame_width) &&
 911                 (svq1_frame_size_table[i].height == s->frame_height))
 912             {
 913                 put_bits(&s->pb, 3, i);
 914                 break;
 915             }
 916         }
 917
 918         if (i == 7)
 919         {
 920             put_bits(&s->pb, 3, 7);
 921                 put_bits(&s->pb, 12, s->frame_width);
 922                 put_bits(&s->pb, 12, s->frame_height);
 923         }
 924     }
 925
 926     /* no checksum or extra data (next 2 bits get 0) */
 927     put_bits(&s->pb, 2, 0);
 928 }
 929
 930
 931 #define QUALITY_THRESHOLD 100
 932 #define THRESHOLD_MULTIPLIER 0.6
 933
 934 #if defined(HAVE_ALTIVEC)
 935 #undef vector
 936 #endif
 937
 938 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
 939     int count, y, x, i, j, split, best_mean, best_score, best_count;
 940     int best_vector[6];
 941     int block_sum[7]= {0, 0, 0, 0, 0, 0};
 942     int w= 2<<((level+2)>>1);
 943     int h= 2<<((level+1)>>1);
 944     int size=w*h;
 945     int16_t block[7][256];
 946     const int8_t *codebook_sum, *codebook;
 947     const uint16_t (*mean_vlc)[2];
 948     const uint8_t (*multistage_vlc)[2];
 949
 950     best_score=0;
 951     //FIXME optimize, this doenst need to be done multiple times
 952     if(intra){
 953         codebook_sum= svq1_intra_codebook_sum[level];
 954         codebook= svq1_intra_codebooks[level];
 955         mean_vlc= svq1_intra_mean_vlc;
 956         multistage_vlc= svq1_intra_multistage_vlc[level];
 957         for(y=0; y<h; y++){
 958             for(x=0; x<w; x++){
 959                 int v= src[x + y*stride];
 960                 block[0][x + w*y]= v;
 961                 best_score += v*v;
 962                 block_sum[0] += v;
 963             }
 964         }
 965     }else{
 966         codebook_sum= svq1_inter_codebook_sum[level];
 967         codebook= svq1_inter_codebooks[level];
 968         mean_vlc= svq1_inter_mean_vlc + 256;
 969         multistage_vlc= svq1_inter_multistage_vlc[level];
 970         for(y=0; y<h; y++){
 971             for(x=0; x<w; x++){
 972                 int v= src[x + y*stride] - ref[x + y*stride];
 973                 block[0][x + w*y]= v;
 974                 best_score += v*v;
 975                 block_sum[0] += v;
 976             }
 977         }
 978     }
 979
 980     best_count=0;
 981     best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
 982     best_mean= (block_sum[0] + (size>>1)) >> (level+3);
 983
 984     if(level<4){
 985         for(count=1; count<7; count++){
 986             int best_vector_score= INT_MAX;
 987             int best_vector_sum=-999, best_vector_mean=-999;
 988             const int stage= count-1;
 989             const int8_t *vector;
 990
 991             for(i=0; i<16; i++){
 992                 int sum= codebook_sum[stage*16 + i];
 993                 int sqr=0;
 994                 int diff, mean, score;
 995
 996                 vector = codebook + stage*size*16 + i*size;
 997
 998                 for(j=0; j<size; j++){
 999                     int v= vector[j];
1000                     sqr += (v - block[stage][j])*(v - block[stage][j]);
1001                 }
1002                 diff= block_sum[stage] - sum;
1003                 mean= (diff + (size>>1)) >> (level+3);
1004                 assert(mean >-300 && mean<300);
1005                 if(intra) mean= clip(mean, 0, 255);
1006                 else      mean= clip(mean, -256, 255);
1007                 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
1008                 if(score < best_vector_score){
1009                     best_vector_score= score;
1010                     best_vector[stage]= i;
1011                     best_vector_sum= sum;
1012                     best_vector_mean= mean;
1013                 }
1014             }
1015             assert(best_vector_mean != -999);
1016             vector= codebook + stage*size*16 + best_vector[stage]*size;
1017             for(j=0; j<size; j++){
1018                 block[stage+1][j] = block[stage][j] - vector[j];
1019             }
1020             block_sum[stage+1]= block_sum[stage] - best_vector_sum;
1021             best_vector_score +=
1022                 lambda*(+ 1 + 4*count
1023                         + multistage_vlc[1+count][1]
1024                         + mean_vlc[best_vector_mean][1]);
1025
1026             if(best_vector_score < best_score){
1027                 best_score= best_vector_score;
1028                 best_count= count;
1029                 best_mean= best_vector_mean;
1030             }
1031         }
1032     }
1033
1034     split=0;
1035     if(best_score > threshold && level){
1036         int score=0;
1037         int offset= (level&1) ? stride*h/2 : w/2;
1038         PutBitContext backup[6];
1039
1040         for(i=level-1; i>=0; i--){
1041             backup[i]= s->reorder_pb[i];
1042         }
1043         score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
1044         score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
1045         score += lambda;
1046
1047         if(score < best_score){
1048             best_score= score;
1049             split=1;
1050         }else{
1051             for(i=level-1; i>=0; i--){
1052                 s->reorder_pb[i]= backup[i];
1053             }
1054         }
1055     }
1056     if (level > 0)
1057         put_bits(&s->reorder_pb[level], 1, split);
1058
1059     if(!split){
1060         assert((best_mean >= 0 && best_mean<256) || !intra);
1061         assert(best_mean >= -256 && best_mean<256);
1062         assert(best_count >=0 && best_count<7);
1063         assert(level<4 || best_count==0);
1064
1065         /* output the encoding */
1066         put_bits(&s->reorder_pb[level],
1067             multistage_vlc[1 + best_count][1],
1068             multistage_vlc[1 + best_count][0]);
1069         put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
1070             mean_vlc[best_mean][0]);
1071
1072         for (i = 0; i < best_count; i++){
1073             assert(best_vector[i]>=0 && best_vector[i]<16);
1074             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
1075         }
1076
1077         for(y=0; y<h; y++){
1078             for(x=0; x<w; x++){
1079                 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
1080             }
1081         }
1082     }
1083
1084     return best_score;
1085 }
1086
1087
1088 static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
1089     int width, int height, int src_stride, int stride)
1090 {
1091     int x, y;
1092     int i;
1093     int block_width, block_height;
1094     int level;
1095     int threshold[6];
1096     const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
1097
1098     /* figure out the acceptable level thresholds in advance */
1099     threshold[5] = QUALITY_THRESHOLD;
1100     for (level = 4; level >= 0; level--)
1101         threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
1102
1103     block_width = (width + 15) / 16;
1104     block_height = (height + 15) / 16;
1105
1106     if(s->picture.pict_type == P_TYPE){
1107         s->m.avctx= s->avctx;
1108         s->m.current_picture_ptr= &s->m.current_picture;
1109         s->m.last_picture_ptr   = &s->m.last_picture;
1110         s->m.last_picture.data[0]= ref_plane;
1111         s->m.linesize=
1112         s->m.last_picture.linesize[0]=
1113         s->m.new_picture.linesize[0]=
1114         s->m.current_picture.linesize[0]= stride;
1115         s->m.width= width;
1116         s->m.height= height;
1117         s->m.mb_width= block_width;
1118         s->m.mb_height= block_height;
1119         s->m.mb_stride= s->m.mb_width+1;
1120         s->m.b8_stride= 2*s->m.mb_width+1;
1121         s->m.f_code=1;
1122         s->m.pict_type= s->picture.pict_type;
1123         s->m.me_method= s->avctx->me_method;
1124         s->m.me.scene_change_score=0;
1125         s->m.flags= s->avctx->flags;
1126 //        s->m.out_format = FMT_H263;
1127 //        s->m.unrestricted_mv= 1;
1128
1129         s->m.lambda= s->picture.quality;
1130         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
1131         s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
1132
1133         if(!s->motion_val8[plane]){
1134             s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
1135             s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
1136         }
1137
1138         s->m.mb_type= s->mb_type;
1139
1140         //dummies, to avoid segfaults
1141         s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
1142         s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
1143         s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
1144         s->m.current_picture.mb_type= s->dummy;
1145
1146         s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
1147         s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
1148         s->m.dsp= s->dsp; //move
1149         ff_init_me(&s->m);
1150
1151         s->m.me.dia_size= s->avctx->dia_size;
1152         s->m.first_slice_line=1;
1153         for (y = 0; y < block_height; y++) {
1154             uint8_t src[stride*16];
1155
1156             s->m.new_picture.data[0]= src - y*16*stride; //ugly
1157             s->m.mb_y= y;
1158
1159             for(i=0; i<16 && i + 16*y<height; i++){
1160                 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1161                 for(x=width; x<16*block_width; x++)
1162                     src[i*stride+x]= src[i*stride+x-1];
1163             }
1164             for(; i<16 && i + 16*y<16*block_height; i++)
1165                 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1166
1167             for (x = 0; x < block_width; x++) {
1168                 s->m.mb_x= x;
1169                 ff_init_block_index(&s->m);
1170                 ff_update_block_index(&s->m);
1171
1172                 ff_estimate_p_frame_motion(&s->m, x, y);
1173             }
1174             s->m.first_slice_line=0;
1175         }
1176
1177         ff_fix_long_p_mvs(&s->m);
1178         ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
1179     }
1180
1181     s->m.first_slice_line=1;
1182     for (y = 0; y < block_height; y++) {
1183         uint8_t src[stride*16];
1184
1185         for(i=0; i<16 && i + 16*y<height; i++){
1186             memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1187             for(x=width; x<16*block_width; x++)
1188                 src[i*stride+x]= src[i*stride+x-1];
1189         }
1190         for(; i<16 && i + 16*y<16*block_height; i++)
1191             memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1192
1193         s->m.mb_y= y;
1194         for (x = 0; x < block_width; x++) {
1195             uint8_t reorder_buffer[3][6][7*32];
1196             int count[3][6];
1197             int offset = y * 16 * stride + x * 16;
1198             uint8_t *decoded= decoded_plane + offset;
1199             uint8_t *ref= ref_plane + offset;
1200             int score[4]={0,0,0,0}, best;
1201             uint8_t temp[16*stride];
1202
1203             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
1204                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1205                 return -1;
1206             }
1207
1208             s->m.mb_x= x;
1209             ff_init_block_index(&s->m);
1210             ff_update_block_index(&s->m);
1211
1212             if(s->picture.pict_type == I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
1213                 for(i=0; i<6; i++){
1214                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
1215                 }
1216                 if(s->picture.pict_type == P_TYPE){
1217                     const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
1218                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1219                     score[0]= vlc[1]*lambda;
1220                 }
1221                 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
1222                 for(i=0; i<6; i++){
1223                     count[0][i]= put_bits_count(&s->reorder_pb[i]);
1224                     flush_put_bits(&s->reorder_pb[i]);
1225                 }
1226             }else
1227                 score[0]= INT_MAX;
1228
1229             best=0;
1230
1231             if(s->picture.pict_type == P_TYPE){
1232                 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
1233                 int mx, my, pred_x, pred_y, dxy;
1234                 int16_t *motion_ptr;
1235
1236                 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
1237                 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
1238                     for(i=0; i<6; i++)
1239                         init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
1240
1241                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1242
1243                     s->m.pb= s->reorder_pb[5];
1244                     mx= motion_ptr[0];
1245                     my= motion_ptr[1];
1246                     assert(mx>=-32 && mx<=31);
1247                     assert(my>=-32 && my<=31);
1248                     assert(pred_x>=-32 && pred_x<=31);
1249                     assert(pred_y>=-32 && pred_y<=31);
1250                     ff_h263_encode_motion(&s->m, mx - pred_x, 1);
1251                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
1252                     s->reorder_pb[5]= s->m.pb;
1253                     score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
1254
1255                     dxy= (mx&1) + 2*(my&1);
1256
1257                     s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
1258
1259                     score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
1260                     best= score[1] <= score[0];
1261
1262                     vlc= svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
1263                     score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
1264                     score[2]+= vlc[1]*lambda;
1265                     if(score[2] < score[best] && mx==0 && my==0){
1266                         best=2;
1267                         s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
1268                         for(i=0; i<6; i++){
1269                             count[2][i]=0;
1270                         }
1271                         put_bits(&s->pb, vlc[1], vlc[0]);
1272                     }
1273                 }
1274
1275                 if(best==1){
1276                     for(i=0; i<6; i++){
1277                         count[1][i]= put_bits_count(&s->reorder_pb[i]);
1278                         flush_put_bits(&s->reorder_pb[i]);
1279                     }
1280                 }else{
1281                     motion_ptr[0                 ] = motion_ptr[1                 ]=
1282                     motion_ptr[2                 ] = motion_ptr[3                 ]=
1283                     motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
1284                     motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
1285                 }
1286             }
1287
1288             s->rd_total += score[best];
1289
1290             for(i=5; i>=0; i--){
1291                 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
1292             }
1293             if(best==0){
1294                 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
1295             }
1296         }
1297         s->m.first_slice_line=0;
1298     }
1299     return 0;
1300 }
1301
1302 static int svq1_encode_init(AVCodecContext *avctx)
1303 {
1304     SVQ1Context * const s = avctx->priv_data;
1305
1306     dsputil_init(&s->dsp, avctx);
1307     avctx->coded_frame= (AVFrame*)&s->picture;
1308
1309     s->frame_width = avctx->width;
1310     s->frame_height = avctx->height;
1311
1312     s->y_block_width = (s->frame_width + 15) / 16;
1313     s->y_block_height = (s->frame_height + 15) / 16;
1314
1315     s->c_block_width = (s->frame_width / 4 + 15) / 16;
1316     s->c_block_height = (s->frame_height / 4 + 15) / 16;
1317
1318     s->avctx= avctx;
1319     s->m.avctx= avctx;
1320     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1321     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1322     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1323     s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
1324     s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
1325     h263_encode_init(&s->m); //mv_penalty
1326
1327     return 0;
1328 }
1329
1330 static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
1331     int buf_size, void *data)
1332 {
1333     SVQ1Context * const s = avctx->priv_data;
1334     AVFrame *pict = data;
1335     AVFrame * const p= (AVFrame*)&s->picture;
1336     AVFrame temp;
1337     int i;
1338
1339     if(avctx->pix_fmt != PIX_FMT_YUV410P){
1340         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
1341         return -1;
1342     }
1343
1344     if(!s->current_picture.data[0]){
1345         avctx->get_buffer(avctx, &s->current_picture);
1346         avctx->get_buffer(avctx, &s->last_picture);
1347     }
1348
1349     temp= s->current_picture;
1350     s->current_picture= s->last_picture;
1351     s->last_picture= temp;
1352
1353     init_put_bits(&s->pb, buf, buf_size);
1354
1355     *p = *pict;
1356     p->pict_type = avctx->frame_number % avctx->gop_size ? P_TYPE : I_TYPE;
1357     p->key_frame = p->pict_type == I_TYPE;
1358
1359     svq1_write_header(s, p->pict_type);
1360     for(i=0; i<3; i++){
1361         if(svq1_encode_plane(s, i,
1362             s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
1363             s->frame_width / (i?4:1), s->frame_height / (i?4:1),
1364             s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
1365                 return -1;
1366     }
1367
1368 //    align_put_bits(&s->pb);
1369     while(put_bits_count(&s->pb) & 31)
1370         put_bits(&s->pb, 1, 0);
1371
1372     flush_put_bits(&s->pb);
1373
1374     return (put_bits_count(&s->pb) / 8);
1375 }
1376
1377 static int svq1_encode_end(AVCodecContext *avctx)
1378 {
1379     SVQ1Context * const s = avctx->priv_data;
1380     int i;
1381
1382     av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
1383
1384     av_freep(&s->m.me.scratchpad);
1385     av_freep(&s->m.me.map);
1386     av_freep(&s->m.me.score_map);
1387     av_freep(&s->mb_type);
1388     av_freep(&s->dummy);
1389
1390     for(i=0; i<3; i++){
1391         av_freep(&s->motion_val8[i]);
1392         av_freep(&s->motion_val16[i]);
1393     }
1394
1395     return 0;
1396 }
1397
1398 #endif //CONFIG_ENCODERS
1399
1400 #ifdef CONFIG_DECODERS
1401 AVCodec svq1_decoder = {
1402     "svq1",
1403     CODEC_TYPE_VIDEO,
1404     CODEC_ID_SVQ1,
1405     sizeof(MpegEncContext),
1406     svq1_decode_init,
1407     NULL,
1408     svq1_decode_end,
1409     svq1_decode_frame,
1410     CODEC_CAP_DR1,
1411     .flush= ff_mpeg_flush,
1412     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1413 };
1414 #endif
1415
1416 #ifdef CONFIG_ENCODERS
1417
1418 AVCodec svq1_encoder = {
1419     "svq1",
1420     CODEC_TYPE_VIDEO,
1421     CODEC_ID_SVQ1,
1422     sizeof(SVQ1Context),
1423     svq1_encode_init,
1424     svq1_encode_frame,
1425     svq1_encode_end,
1426     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1427 };
1428
1429 #endif //CONFIG_ENCODERS