libavcodec/svq1.c

   1 /*
   2  *
   3  * Copyright (C) 2002 the xine project
   4  * Copyright (C) 2002 the ffmpeg project
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * (SVQ1 Decoder)
  21  * Ported to mplayer by Arpi <arpi@thot.banki.hu>
  22  * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
  23  *
  24  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  25  */
  26
  27 /**
  28  * @file svq1.c
  29  * Sorenson Vector Quantizer #1 (SVQ1) video codec.
  30  * For more information of the SVQ1 algorithm, visit:
  31  *   http://www.pcisys.net/~melanson/codecs/
  32  */
  33
  34
  35 //#define DEBUG_SVQ1
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39 #include <unistd.h>
  40 #include <limits.h>
  41
  42 #include "common.h"
  43 #include "avcodec.h"
  44 #include "dsputil.h"
  45 #include "mpegvideo.h"
  46 #include "bswap.h"
  47
  48 #undef NDEBUG
  49 #include <assert.h>
  50
  51 extern const uint8_t mvtab[33][2];
  52
  53 static VLC svq1_block_type;
  54 static VLC svq1_motion_component;
  55 static VLC svq1_intra_multistage[6];
  56 static VLC svq1_inter_multistage[6];
  57 static VLC svq1_intra_mean;
  58 static VLC svq1_inter_mean;
  59
  60 #define SVQ1_BLOCK_SKIP         0
  61 #define SVQ1_BLOCK_INTER        1
  62 #define SVQ1_BLOCK_INTER_4V     2
  63 #define SVQ1_BLOCK_INTRA        3
  64
  65 typedef struct SVQ1Context {
  66     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
  67     AVCodecContext *avctx;
  68     DSPContext dsp;
  69     AVFrame picture;
  70     AVFrame current_picture;
  71     AVFrame last_picture;
  72     PutBitContext pb;
  73     GetBitContext gb;
  74
  75     PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
  76
  77     int frame_width;
  78     int frame_height;
  79
  80     /* Y plane block dimensions */
  81     int y_block_width;
  82     int y_block_height;
  83
  84     /* U & V plane (C planes) block dimensions */
  85     int c_block_width;
  86     int c_block_height;
  87
  88     uint16_t *mb_type;
  89     uint32_t *dummy;
  90     int16_t (*motion_val8[3])[2];
  91     int16_t (*motion_val16[3])[2];
  92
  93     int64_t rd_total;
  94 } SVQ1Context;
  95
  96 /* motion vector (prediction) */
  97 typedef struct svq1_pmv_s {
  98   int            x;
  99   int            y;
 100 } svq1_pmv_t;
 101
 102 #include "svq1_cb.h"
 103 #include "svq1_vlc.h"
 104
 105 static const uint16_t checksum_table[256] = {
 106   0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
 107   0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
 108   0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
 109   0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
 110   0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
 111   0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
 112   0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
 113   0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
 114   0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
 115   0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
 116   0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
 117   0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
 118   0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
 119   0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
 120   0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
 121   0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
 122   0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
 123   0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
 124   0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
 125   0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
 126   0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
 127   0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 128   0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
 129   0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
 130   0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
 131   0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
 132   0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
 133   0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
 134   0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
 135   0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
 136   0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
 137   0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
 138 };
 139
 140 static const uint8_t string_table[256] = {
 141   0x00, 0xD5, 0x7F, 0xAA, 0xFE, 0x2B, 0x81, 0x54,
 142   0x29, 0xFC, 0x56, 0x83, 0xD7, 0x02, 0xA8, 0x7D,
 143   0x52, 0x87, 0x2D, 0xF8, 0xAC, 0x79, 0xD3, 0x06,
 144   0x7B, 0xAE, 0x04, 0xD1, 0x85, 0x50, 0xFA, 0x2F,
 145   0xA4, 0x71, 0xDB, 0x0E, 0x5A, 0x8F, 0x25, 0xF0,
 146   0x8D, 0x58, 0xF2, 0x27, 0x73, 0xA6, 0x0C, 0xD9,
 147   0xF6, 0x23, 0x89, 0x5C, 0x08, 0xDD, 0x77, 0xA2,
 148   0xDF, 0x0A, 0xA0, 0x75, 0x21, 0xF4, 0x5E, 0x8B,
 149   0x9D, 0x48, 0xE2, 0x37, 0x63, 0xB6, 0x1C, 0xC9,
 150   0xB4, 0x61, 0xCB, 0x1E, 0x4A, 0x9F, 0x35, 0xE0,
 151   0xCF, 0x1A, 0xB0, 0x65, 0x31, 0xE4, 0x4E, 0x9B,
 152   0xE6, 0x33, 0x99, 0x4C, 0x18, 0xCD, 0x67, 0xB2,
 153   0x39, 0xEC, 0x46, 0x93, 0xC7, 0x12, 0xB8, 0x6D,
 154   0x10, 0xC5, 0x6F, 0xBA, 0xEE, 0x3B, 0x91, 0x44,
 155   0x6B, 0xBE, 0x14, 0xC1, 0x95, 0x40, 0xEA, 0x3F,
 156   0x42, 0x97, 0x3D, 0xE8, 0xBC, 0x69, 0xC3, 0x16,
 157   0xEF, 0x3A, 0x90, 0x45, 0x11, 0xC4, 0x6E, 0xBB,
 158   0xC6, 0x13, 0xB9, 0x6C, 0x38, 0xED, 0x47, 0x92,
 159   0xBD, 0x68, 0xC2, 0x17, 0x43, 0x96, 0x3C, 0xE9,
 160   0x94, 0x41, 0xEB, 0x3E, 0x6A, 0xBF, 0x15, 0xC0,
 161   0x4B, 0x9E, 0x34, 0xE1, 0xB5, 0x60, 0xCA, 0x1F,
 162   0x62, 0xB7, 0x1D, 0xC8, 0x9C, 0x49, 0xE3, 0x36,
 163   0x19, 0xCC, 0x66, 0xB3, 0xE7, 0x32, 0x98, 0x4D,
 164   0x30, 0xE5, 0x4F, 0x9A, 0xCE, 0x1B, 0xB1, 0x64,
 165   0x72, 0xA7, 0x0D, 0xD8, 0x8C, 0x59, 0xF3, 0x26,
 166   0x5B, 0x8E, 0x24, 0xF1, 0xA5, 0x70, 0xDA, 0x0F,
 167   0x20, 0xF5, 0x5F, 0x8A, 0xDE, 0x0B, 0xA1, 0x74,
 168   0x09, 0xDC, 0x76, 0xA3, 0xF7, 0x22, 0x88, 0x5D,
 169   0xD6, 0x03, 0xA9, 0x7C, 0x28, 0xFD, 0x57, 0x82,
 170   0xFF, 0x2A, 0x80, 0x55, 0x01, 0xD4, 0x7E, 0xAB,
 171   0x84, 0x51, 0xFB, 0x2E, 0x7A, 0xAF, 0x05, 0xD0,
 172   0xAD, 0x78, 0xD2, 0x07, 0x53, 0x86, 0x2C, 0xF9
 173 };
 174
 175 #define SVQ1_PROCESS_VECTOR()\
 176     for (; level > 0; i++) {\
 177       /* process next depth */\
 178       if (i == m) {\
 179         m = n;\
 180         if (--level == 0)\
 181           break;\
 182       }\
 183       /* divide block if next bit set */\
 184       if (get_bits (bitbuf, 1) == 0)\
 185         break;\
 186       /* add child nodes */\
 187       list[n++] = list[i];\
 188       list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level / 2) + 1));\
 189     }
 190
 191 #define SVQ1_ADD_CODEBOOK()\
 192           /* add codebook entries to vector */\
 193           for (j=0; j < stages; j++) {\
 194             n3  = codebook[entries[j]] ^ 0x80808080;\
 195             n1 += ((n3 & 0xFF00FF00) >> 8);\
 196             n2 +=  (n3 & 0x00FF00FF);\
 197           }\
 198 \
 199           /* clip to [0..255] */\
 200           if (n1 & 0xFF00FF00) {\
 201             n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 202             n1 += 0x7F007F00;\
 203             n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 204             n1 &= (n3 & 0x00FF00FF);\
 205           }\
 206 \
 207           if (n2 & 0xFF00FF00) {\
 208             n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 209             n2 += 0x7F007F00;\
 210             n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 211             n2 &= (n3 & 0x00FF00FF);\
 212           }
 213
 214 #define SVQ1_DO_CODEBOOK_INTRA()\
 215       for (y=0; y < height; y++) {\
 216         for (x=0; x < (width / 4); x++, codebook++) {\
 217         n1 = n4;\
 218         n2 = n4;\
 219         SVQ1_ADD_CODEBOOK()\
 220         /* store result */\
 221         dst[x] = (n1 << 8) | n2;\
 222         }\
 223         dst += (pitch / 4);\
 224       }
 225
 226 #define SVQ1_DO_CODEBOOK_NONINTRA()\
 227       for (y=0; y < height; y++) {\
 228         for (x=0; x < (width / 4); x++, codebook++) {\
 229         n3 = dst[x];\
 230         /* add mean value to vector */\
 231         n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
 232         n2 =  (n3 & 0x00FF00FF)   + n4;\
 233         SVQ1_ADD_CODEBOOK()\
 234         /* store result */\
 235         dst[x] = (n1 << 8) | n2;\
 236         }\
 237         dst += (pitch / 4);\
 238       }
 239
 240 #define SVQ1_CALC_CODEBOOK_ENTRIES(cbook)\
 241       codebook = (const uint32_t *) cbook[level];\
 242       bit_cache = get_bits (bitbuf, 4*stages);\
 243       /* calculate codebook entries for this vector */\
 244       for (j=0; j < stages; j++) {\
 245         entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
 246       }\
 247       mean -= (stages * 128);\
 248       n4    = ((mean + (mean >> 31)) << 16) | (mean & 0xFFFF);
 249
 250 static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 251   uint32_t    bit_cache;
 252   uint8_t    *list[63];
 253   uint32_t   *dst;
 254   const uint32_t *codebook;
 255   int         entries[6];
 256   int         i, j, m, n;
 257   int         mean, stages;
 258   unsigned    x, y, width, height, level;
 259   uint32_t    n1, n2, n3, n4;
 260
 261   /* initialize list for breadth first processing of vectors */
 262   list[0] = pixels;
 263
 264   /* recursively process vector */
 265   for (i=0, m=1, n=1, level=5; i < n; i++) {
 266     SVQ1_PROCESS_VECTOR();
 267
 268     /* destination address and vector size */
 269     dst = (uint32_t *) list[i];
 270     width = 1 << ((4 + level) /2);
 271     height = 1 << ((3 + level) /2);
 272
 273     /* get number of stages (-1 skips vector, 0 for mean only) */
 274     stages = get_vlc2(bitbuf, svq1_intra_multistage[level].table, 3, 3) - 1;
 275
 276     if (stages == -1) {
 277         for (y=0; y < height; y++) {
 278           memset (&dst[y*(pitch / 4)], 0, width);
 279         }
 280       continue;         /* skip vector */
 281     }
 282
 283     if ((stages > 0) && (level >= 4)) {
 284 #ifdef DEBUG_SVQ1
 285     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
 286 #endif
 287       return -1;        /* invalid vector */
 288     }
 289
 290     mean = get_vlc2(bitbuf, svq1_intra_mean.table, 8, 3);
 291
 292     if (stages == 0) {
 293       for (y=0; y < height; y++) {
 294         memset (&dst[y*(pitch / 4)], mean, width);
 295       }
 296     } else {
 297       SVQ1_CALC_CODEBOOK_ENTRIES(svq1_intra_codebooks);
 298       SVQ1_DO_CODEBOOK_INTRA()
 299     }
 300   }
 301
 302   return 0;
 303 }
 304
 305 static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 306   uint32_t    bit_cache;
 307   uint8_t    *list[63];
 308   uint32_t   *dst;
 309   const uint32_t *codebook;
 310   int         entries[6];
 311   int         i, j, m, n;
 312   int         mean, stages;
 313   int         x, y, width, height, level;
 314   uint32_t    n1, n2, n3, n4;
 315
 316   /* initialize list for breadth first processing of vectors */
 317   list[0] = pixels;
 318
 319   /* recursively process vector */
 320   for (i=0, m=1, n=1, level=5; i < n; i++) {
 321     SVQ1_PROCESS_VECTOR();
 322
 323     /* destination address and vector size */
 324     dst = (uint32_t *) list[i];
 325     width = 1 << ((4 + level) /2);
 326     height = 1 << ((3 + level) /2);
 327
 328     /* get number of stages (-1 skips vector, 0 for mean only) */
 329     stages = get_vlc2(bitbuf, svq1_inter_multistage[level].table, 3, 2) - 1;
 330
 331     if (stages == -1) continue; /* skip vector */
 332
 333     if ((stages > 0) && (level >= 4)) {
 334 #ifdef DEBUG_SVQ1
 335     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
 336 #endif
 337       return -1;        /* invalid vector */
 338     }
 339
 340     mean = get_vlc2(bitbuf, svq1_inter_mean.table, 9, 3) - 256;
 341
 342     SVQ1_CALC_CODEBOOK_ENTRIES(svq1_inter_codebooks);
 343     SVQ1_DO_CODEBOOK_NONINTRA()
 344   }
 345   return 0;
 346 }
 347
 348 static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
 349   int         diff;
 350   int         i;
 351
 352   for (i=0; i < 2; i++) {
 353
 354     /* get motion code */
 355     diff = get_vlc2(bitbuf, svq1_motion_component.table, 7, 2);
 356     if(diff<0)
 357         return -1;
 358     else if(diff){
 359         if(get_bits1(bitbuf)) diff= -diff;
 360     }
 361
 362     /* add median of motion vector predictors and clip result */
 363     if (i == 1)
 364       mv->y = ((diff + mid_pred(pmv[0]->y, pmv[1]->y, pmv[2]->y)) << 26) >> 26;
 365     else
 366       mv->x = ((diff + mid_pred(pmv[0]->x, pmv[1]->x, pmv[2]->x)) << 26) >> 26;
 367   }
 368
 369   return 0;
 370 }
 371
 372 static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int x, int y) {
 373   uint8_t *src;
 374   uint8_t *dst;
 375   int      i;
 376
 377   src = &previous[x + y*pitch];
 378   dst = current;
 379
 380   for (i=0; i < 16; i++) {
 381     memcpy (dst, src, 16);
 382     src += pitch;
 383     dst += pitch;
 384   }
 385 }
 386
 387 static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
 388                                uint8_t *current, uint8_t *previous, int pitch,
 389                                svq1_pmv_t *motion, int x, int y) {
 390   uint8_t    *src;
 391   uint8_t    *dst;
 392   svq1_pmv_t  mv;
 393   svq1_pmv_t *pmv[3];
 394   int         result;
 395
 396   /* predict and decode motion vector */
 397   pmv[0] = &motion[0];
 398   if (y == 0) {
 399     pmv[1] =
 400     pmv[2] = pmv[0];
 401   }
 402   else {
 403     pmv[1] = &motion[(x / 8) + 2];
 404     pmv[2] = &motion[(x / 8) + 4];
 405   }
 406
 407   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 408
 409   if (result != 0)
 410     return result;
 411
 412   motion[0].x           =
 413   motion[(x / 8) + 2].x =
 414   motion[(x / 8) + 3].x = mv.x;
 415   motion[0].y           =
 416   motion[(x / 8) + 2].y =
 417   motion[(x / 8) + 3].y = mv.y;
 418
 419   if(y + (mv.y >> 1)<0)
 420      mv.y= 0;
 421   if(x + (mv.x >> 1)<0)
 422      mv.x= 0;
 423
 424 #if 0
 425   int w= (s->width+15)&~15;
 426   int h= (s->height+15)&~15;
 427   if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
 428       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
 429 #endif
 430
 431   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
 432   dst = current;
 433
 434   s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
 435
 436   return 0;
 437 }
 438
 439 static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
 440                                   uint8_t *current, uint8_t *previous, int pitch,
 441                                   svq1_pmv_t *motion,int x, int y) {
 442   uint8_t    *src;
 443   uint8_t    *dst;
 444   svq1_pmv_t  mv;
 445   svq1_pmv_t *pmv[4];
 446   int         i, result;
 447
 448   /* predict and decode motion vector (0) */
 449   pmv[0] = &motion[0];
 450   if (y == 0) {
 451     pmv[1] =
 452     pmv[2] = pmv[0];
 453   }
 454   else {
 455     pmv[1] = &motion[(x / 8) + 2];
 456     pmv[2] = &motion[(x / 8) + 4];
 457   }
 458
 459   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 460
 461   if (result != 0)
 462     return result;
 463
 464   /* predict and decode motion vector (1) */
 465   pmv[0] = &mv;
 466   if (y == 0) {
 467     pmv[1] =
 468     pmv[2] = pmv[0];
 469   }
 470   else {
 471     pmv[1] = &motion[(x / 8) + 3];
 472   }
 473   result = svq1_decode_motion_vector (bitbuf, &motion[0], pmv);
 474
 475   if (result != 0)
 476     return result;
 477
 478   /* predict and decode motion vector (2) */
 479   pmv[1] = &motion[0];
 480   pmv[2] = &motion[(x / 8) + 1];
 481
 482   result = svq1_decode_motion_vector (bitbuf, &motion[(x / 8) + 2], pmv);
 483
 484   if (result != 0)
 485     return result;
 486
 487   /* predict and decode motion vector (3) */
 488   pmv[2] = &motion[(x / 8) + 2];
 489   pmv[3] = &motion[(x / 8) + 3];
 490
 491   result = svq1_decode_motion_vector (bitbuf, pmv[3], pmv);
 492
 493   if (result != 0)
 494     return result;
 495
 496   /* form predictions */
 497   for (i=0; i < 4; i++) {
 498     int mvx= pmv[i]->x + (i&1)*16;
 499     int mvy= pmv[i]->y + (i>>1)*16;
 500
 501     ///XXX /FIXME cliping or padding?
 502     if(y + (mvy >> 1)<0)
 503        mvy= 0;
 504     if(x + (mvx >> 1)<0)
 505        mvx= 0;
 506
 507 #if 0
 508   int w= (s->width+15)&~15;
 509   int h= (s->height+15)&~15;
 510   if(x + (mvx >> 1)<0 || y + (mvy >> 1)<0 || x + (mvx >> 1) + 8 > w || y + (mvy >> 1) + 8> h)
 511       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mvx >> 1), y + (mvy >> 1));
 512 #endif
 513     src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
 514     dst = current;
 515
 516     s->dsp.put_pixels_tab[1][((mvy & 1) << 1) | (mvx & 1)](dst,src,pitch,8);
 517
 518     /* select next block */
 519     if (i & 1) {
 520       current  += 8*(pitch - 1);
 521     } else {
 522       current  += 8;
 523     }
 524   }
 525
 526   return 0;
 527 }
 528
 529 static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
 530                         uint8_t *current, uint8_t *previous, int pitch,
 531                         svq1_pmv_t *motion, int x, int y) {
 532   uint32_t block_type;
 533   int      result = 0;
 534
 535   /* get block type */
 536   block_type = get_vlc2(bitbuf, svq1_block_type.table, 2, 2);
 537
 538   /* reset motion vectors */
 539   if (block_type == SVQ1_BLOCK_SKIP || block_type == SVQ1_BLOCK_INTRA) {
 540     motion[0].x           =
 541     motion[0].y           =
 542     motion[(x / 8) + 2].x =
 543     motion[(x / 8) + 2].y =
 544     motion[(x / 8) + 3].x =
 545     motion[(x / 8) + 3].y = 0;
 546   }
 547
 548   switch (block_type) {
 549   case SVQ1_BLOCK_SKIP:
 550     svq1_skip_block (current, previous, pitch, x, y);
 551     break;
 552
 553   case SVQ1_BLOCK_INTER:
 554     result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
 555
 556     if (result != 0)
 557     {
 558 #ifdef DEBUG_SVQ1
 559     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result);
 560 #endif
 561       break;
 562     }
 563     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 564     break;
 565
 566   case SVQ1_BLOCK_INTER_4V:
 567     result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
 568
 569     if (result != 0)
 570     {
 571 #ifdef DEBUG_SVQ1
 572     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result);
 573 #endif
 574       break;
 575     }
 576     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 577     break;
 578
 579   case SVQ1_BLOCK_INTRA:
 580     result = svq1_decode_block_intra (bitbuf, current, pitch);
 581     break;
 582   }
 583
 584   return result;
 585 }
 586
 587 /* standard video sizes */
 588 static struct { int width; int height; } svq1_frame_size_table[8] = {
 589   { 160, 120 }, { 128,  96 }, { 176, 144 }, { 352, 288 },
 590   { 704, 576 }, { 240, 180 }, { 320, 240 }, {  -1,  -1 }
 591 };
 592
 593 static uint16_t svq1_packet_checksum (uint8_t *data, int length, int value) {
 594   int i;
 595
 596   for (i=0; i < length; i++) {
 597     value = checksum_table[data[i] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 598   }
 599
 600   return value;
 601 }
 602
 603 static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch,
 604                                          int width, int height, int value) {
 605   int x, y;
 606
 607   for (y=0; y < height; y++) {
 608     for (x=0; x < width; x++) {
 609       value = checksum_table[pixels[x] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 610     }
 611
 612     pixels += pitch;
 613   }
 614
 615   return value;
 616 }
 617
 618 static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) {
 619   uint8_t seed;
 620   int     i;
 621
 622   out[0] = get_bits (bitbuf, 8);
 623
 624   seed = string_table[out[0]];
 625
 626   for (i=1; i <= out[0]; i++) {
 627     out[i] = get_bits (bitbuf, 8) ^ seed;
 628     seed   = string_table[out[i] ^ seed];
 629   }
 630 }
 631
 632 static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
 633   int frame_size_code;
 634   int temporal_reference;
 635
 636   temporal_reference = get_bits (bitbuf, 8);
 637
 638   /* frame type */
 639   s->pict_type= get_bits (bitbuf, 2)+1;
 640   if(s->pict_type==4)
 641       return -1;
 642
 643   if (s->pict_type == I_TYPE) {
 644
 645     /* unknown fields */
 646     if (s->f_code == 0x50 || s->f_code == 0x60) {
 647       int csum = get_bits (bitbuf, 16);
 648
 649       csum = svq1_packet_checksum ((uint8_t *)bitbuf->buffer, bitbuf->size_in_bits>>3, csum);
 650
 651 //      av_log(s->avctx, AV_LOG_INFO, "%s checksum (%02x) for packet data\n",
 652 //              (csum == 0) ? "correct" : "incorrect", csum);
 653     }
 654
 655     if ((s->f_code ^ 0x10) >= 0x50) {
 656       char msg[256];
 657
 658       svq1_parse_string (bitbuf, (char *) msg);
 659
 660       av_log(s->avctx, AV_LOG_INFO, "embedded message: \"%s\"\n", (char *) msg);
 661     }
 662
 663     skip_bits (bitbuf, 2);
 664     skip_bits (bitbuf, 2);
 665     skip_bits1 (bitbuf);
 666
 667     /* load frame size */
 668     frame_size_code = get_bits (bitbuf, 3);
 669
 670     if (frame_size_code == 7) {
 671       /* load width, height (12 bits each) */
 672       s->width = get_bits (bitbuf, 12);
 673       s->height = get_bits (bitbuf, 12);
 674
 675       if (!s->width || !s->height)
 676         return -1;
 677     } else {
 678       /* get width, height from table */
 679       s->width = svq1_frame_size_table[frame_size_code].width;
 680       s->height = svq1_frame_size_table[frame_size_code].height;
 681     }
 682   }
 683
 684   /* unknown fields */
 685   if (get_bits (bitbuf, 1) == 1) {
 686     skip_bits1 (bitbuf);       /* use packet checksum if (1) */
 687     skip_bits1 (bitbuf);       /* component checksums after image data if (1) */
 688
 689     if (get_bits (bitbuf, 2) != 0)
 690       return -1;
 691   }
 692
 693   if (get_bits (bitbuf, 1) == 1) {
 694     skip_bits1 (bitbuf);
 695     skip_bits (bitbuf, 4);
 696     skip_bits1 (bitbuf);
 697     skip_bits (bitbuf, 2);
 698
 699     while (get_bits (bitbuf, 1) == 1) {
 700       skip_bits (bitbuf, 8);
 701     }
 702   }
 703
 704   return 0;
 705 }
 706
 707 static int svq1_decode_frame(AVCodecContext *avctx,
 708                              void *data, int *data_size,
 709                              uint8_t *buf, int buf_size)
 710 {
 711   MpegEncContext *s=avctx->priv_data;
 712   uint8_t      *current, *previous;
 713   int           result, i, x, y, width, height;
 714   AVFrame *pict = data;
 715
 716   if(buf==NULL && buf_size==0){
 717       return 0;
 718   }
 719
 720   /* initialize bit buffer */
 721   init_get_bits(&s->gb,buf,buf_size*8);
 722
 723   /* decode frame header */
 724   s->f_code = get_bits (&s->gb, 22);
 725
 726   if ((s->f_code & ~0x70) || !(s->f_code & 0x60))
 727     return -1;
 728
 729   /* swap some header bytes (why?) */
 730   if (s->f_code != 0x20) {
 731     uint32_t *src = (uint32_t *) (buf + 4);
 732
 733     for (i=0; i < 4; i++) {
 734       src[i] = ((src[i] << 16) | (src[i] >> 16)) ^ src[7 - i];
 735     }
 736   }
 737
 738   result = svq1_decode_frame_header (&s->gb, s);
 739
 740   if (result != 0)
 741   {
 742 #ifdef DEBUG_SVQ1
 743     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result);
 744 #endif
 745     return result;
 746   }
 747
 748   //FIXME this avoids some confusion for "B frames" without 2 references
 749   //this should be removed after libavcodec can handle more flexible picture types & ordering
 750   if(s->pict_type==B_TYPE && s->last_picture_ptr==NULL) return buf_size;
 751
 752   if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
 753
 754   if(MPV_frame_start(s, avctx) < 0)
 755       return -1;
 756
 757   /* decode y, u and v components */
 758   for (i=0; i < 3; i++) {
 759     int linesize;
 760     if (i == 0) {
 761       width  = (s->width+15)&~15;
 762       height = (s->height+15)&~15;
 763       linesize= s->linesize;
 764     } else {
 765       if(s->flags&CODEC_FLAG_GRAY) break;
 766       width  = (s->width/4+15)&~15;
 767       height = (s->height/4+15)&~15;
 768       linesize= s->uvlinesize;
 769     }
 770
 771     current  = s->current_picture.data[i];
 772
 773     if(s->pict_type==B_TYPE){
 774         previous = s->next_picture.data[i];
 775     }else{
 776         previous = s->last_picture.data[i];
 777     }
 778
 779     if (s->pict_type == I_TYPE) {
 780       /* keyframe */
 781       for (y=0; y < height; y+=16) {
 782         for (x=0; x < width; x+=16) {
 783           result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
 784           if (result != 0)
 785           {
 786 //#ifdef DEBUG_SVQ1
 787             av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
 788 //#endif
 789             return result;
 790           }
 791         }
 792         current += 16*linesize;
 793       }
 794     } else {
 795       svq1_pmv_t pmv[width/8+3];
 796       /* delta frame */
 797       memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
 798
 799       for (y=0; y < height; y+=16) {
 800         for (x=0; x < width; x+=16) {
 801           result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
 802                                             linesize, pmv, x, y);
 803           if (result != 0)
 804           {
 805 #ifdef DEBUG_SVQ1
 806     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
 807 #endif
 808             return result;
 809           }
 810         }
 811
 812         pmv[0].x =
 813         pmv[0].y = 0;
 814
 815         current += 16*linesize;
 816       }
 817     }
 818   }
 819
 820   *pict = *(AVFrame*)&s->current_picture;
 821
 822
 823   MPV_frame_end(s);
 824
 825   *data_size=sizeof(AVFrame);
 826   return buf_size;
 827 }
 828
 829 static int svq1_decode_init(AVCodecContext *avctx)
 830 {
 831     MpegEncContext *s = avctx->priv_data;
 832     int i;
 833
 834     MPV_decode_defaults(s);
 835
 836     s->avctx = avctx;
 837     s->width = (avctx->width+3)&~3;
 838     s->height = (avctx->height+3)&~3;
 839     s->codec_id= avctx->codec->id;
 840     avctx->pix_fmt = PIX_FMT_YUV410P;
 841     avctx->has_b_frames= 1; // not true, but DP frames and these behave like unidirectional b frames
 842     s->flags= avctx->flags;
 843     if (MPV_common_init(s) < 0) return -1;
 844
 845     init_vlc(&svq1_block_type, 2, 4,
 846         &svq1_block_type_vlc[0][1], 2, 1,
 847         &svq1_block_type_vlc[0][0], 2, 1);
 848
 849     init_vlc(&svq1_motion_component, 7, 33,
 850         &mvtab[0][1], 2, 1,
 851         &mvtab[0][0], 2, 1);
 852
 853     for (i = 0; i < 6; i++) {
 854         init_vlc(&svq1_intra_multistage[i], 3, 8,
 855             &svq1_intra_multistage_vlc[i][0][1], 2, 1,
 856             &svq1_intra_multistage_vlc[i][0][0], 2, 1);
 857         init_vlc(&svq1_inter_multistage[i], 3, 8,
 858             &svq1_inter_multistage_vlc[i][0][1], 2, 1,
 859             &svq1_inter_multistage_vlc[i][0][0], 2, 1);
 860     }
 861
 862     init_vlc(&svq1_intra_mean, 8, 256,
 863         &svq1_intra_mean_vlc[0][1], 4, 2,
 864         &svq1_intra_mean_vlc[0][0], 4, 2);
 865
 866     init_vlc(&svq1_inter_mean, 9, 512,
 867         &svq1_inter_mean_vlc[0][1], 4, 2,
 868         &svq1_inter_mean_vlc[0][0], 4, 2);
 869
 870     return 0;
 871 }
 872
 873 static int svq1_decode_end(AVCodecContext *avctx)
 874 {
 875     MpegEncContext *s = avctx->priv_data;
 876
 877     MPV_common_end(s);
 878     return 0;
 879 }
 880
 881 static void svq1_write_header(SVQ1Context *s, int frame_type)
 882 {
 883     /* frame code */
 884     put_bits(&s->pb, 22, 0x20);
 885
 886     /* temporal reference (sure hope this is a "don't care") */
 887     put_bits(&s->pb, 8, 0x00);
 888
 889     /* frame type */
 890     put_bits(&s->pb, 2, frame_type - 1);
 891
 892     if (frame_type == I_TYPE) {
 893
 894         /* no checksum since frame code is 0x20 */
 895
 896         /* no embedded string either */
 897
 898         /* output 5 unknown bits (2 + 2 + 1) */
 899         put_bits(&s->pb, 5, 0);
 900
 901         /* forget about matching up resolutions, just use the free-form
 902          * resolution code (7) for now */
 903         put_bits(&s->pb, 3, 7);
 904         put_bits(&s->pb, 12, s->frame_width);
 905         put_bits(&s->pb, 12, s->frame_height);
 906
 907     }
 908
 909     /* no checksum or extra data (next 2 bits get 0) */
 910     put_bits(&s->pb, 2, 0);
 911 }
 912
 913
 914 #define QUALITY_THRESHOLD 100
 915 #define THRESHOLD_MULTIPLIER 0.6
 916
 917 #if defined(HAVE_ALTIVEC)
 918 #undef vector
 919 #endif
 920
 921 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
 922     int count, y, x, i, j, split, best_mean, best_score, best_count;
 923     int best_vector[6];
 924     int block_sum[7]= {0, 0, 0, 0, 0, 0};
 925     int w= 2<<((level+2)>>1);
 926     int h= 2<<((level+1)>>1);
 927     int size=w*h;
 928     int16_t block[7][256];
 929     const int8_t *codebook_sum, *codebook;
 930     const uint16_t (*mean_vlc)[2];
 931     const uint8_t (*multistage_vlc)[2];
 932
 933     best_score=0;
 934     //FIXME optimize, this doenst need to be done multiple times
 935     if(intra){
 936         codebook_sum= svq1_intra_codebook_sum[level];
 937         codebook= svq1_intra_codebooks[level];
 938         mean_vlc= svq1_intra_mean_vlc;
 939         multistage_vlc= svq1_intra_multistage_vlc[level];
 940         for(y=0; y<h; y++){
 941             for(x=0; x<w; x++){
 942                 int v= src[x + y*stride];
 943                 block[0][x + w*y]= v;
 944                 best_score += v*v;
 945                 block_sum[0] += v;
 946             }
 947         }
 948     }else{
 949         codebook_sum= svq1_inter_codebook_sum[level];
 950         codebook= svq1_inter_codebooks[level];
 951         mean_vlc= svq1_inter_mean_vlc + 256;
 952         multistage_vlc= svq1_inter_multistage_vlc[level];
 953         for(y=0; y<h; y++){
 954             for(x=0; x<w; x++){
 955                 int v= src[x + y*stride] - ref[x + y*stride];
 956                 block[0][x + w*y]= v;
 957                 best_score += v*v;
 958                 block_sum[0] += v;
 959             }
 960         }
 961     }
 962
 963     best_count=0;
 964     best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
 965     best_mean= (block_sum[0] + (size>>1)) >> (level+3);
 966
 967     if(level<4){
 968         for(count=1; count<7; count++){
 969             int best_vector_score= INT_MAX;
 970             int best_vector_sum=-999, best_vector_mean=-999;
 971             const int stage= count-1;
 972             const int8_t *vector;
 973
 974             for(i=0; i<16; i++){
 975                 int sum= codebook_sum[stage*16 + i];
 976                 int sqr=0;
 977                 int diff, mean, score;
 978
 979                 vector = codebook + stage*size*16 + i*size;
 980
 981                 for(j=0; j<size; j++){
 982                     int v= vector[j];
 983                     sqr += (v - block[stage][j])*(v - block[stage][j]);
 984                 }
 985                 diff= block_sum[stage] - sum;
 986                 mean= (diff + (size>>1)) >> (level+3);
 987                 assert(mean >-300 && mean<300);
 988                 if(intra) mean= clip(mean, 0, 255);
 989                 else      mean= clip(mean, -256, 255);
 990                 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
 991                 if(score < best_vector_score){
 992                     best_vector_score= score;
 993                     best_vector[stage]= i;
 994                     best_vector_sum= sum;
 995                     best_vector_mean= mean;
 996                 }
 997             }
 998             assert(best_vector_mean != -999);
 999             vector= codebook + stage*size*16 + best_vector[stage]*size;
1000             for(j=0; j<size; j++){
1001                 block[stage+1][j] = block[stage][j] - vector[j];
1002             }
1003             block_sum[stage+1]= block_sum[stage] - best_vector_sum;
1004             best_vector_score +=
1005                 lambda*(+ 1 + 4*count
1006                         + multistage_vlc[1+count][1]
1007                         + mean_vlc[best_vector_mean][1]);
1008
1009             if(best_vector_score < best_score){
1010                 best_score= best_vector_score;
1011                 best_count= count;
1012                 best_mean= best_vector_mean;
1013             }
1014         }
1015     }
1016
1017     split=0;
1018     if(best_score > threshold && level){
1019         int score=0;
1020         int offset= (level&1) ? stride*h/2 : w/2;
1021         PutBitContext backup[6];
1022
1023         for(i=level-1; i>=0; i--){
1024             backup[i]= s->reorder_pb[i];
1025         }
1026         score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
1027         score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
1028         score += lambda;
1029
1030         if(score < best_score){
1031             best_score= score;
1032             split=1;
1033         }else{
1034             for(i=level-1; i>=0; i--){
1035                 s->reorder_pb[i]= backup[i];
1036             }
1037         }
1038     }
1039     if (level > 0)
1040         put_bits(&s->reorder_pb[level], 1, split);
1041
1042     if(!split){
1043         assert((best_mean >= 0 && best_mean<256) || !intra);
1044         assert(best_mean >= -256 && best_mean<256);
1045         assert(best_count >=0 && best_count<7);
1046         assert(level<4 || best_count==0);
1047
1048         /* output the encoding */
1049         put_bits(&s->reorder_pb[level],
1050             multistage_vlc[1 + best_count][1],
1051             multistage_vlc[1 + best_count][0]);
1052         put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
1053             mean_vlc[best_mean][0]);
1054
1055         for (i = 0; i < best_count; i++){
1056             assert(best_vector[i]>=0 && best_vector[i]<16);
1057             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
1058         }
1059
1060         for(y=0; y<h; y++){
1061             for(x=0; x<w; x++){
1062                 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
1063             }
1064         }
1065     }
1066
1067     return best_score;
1068 }
1069
1070 #ifdef CONFIG_ENCODERS
1071
1072 static void svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
1073     int width, int height, int src_stride, int stride)
1074 {
1075     int x, y;
1076     int i;
1077     int block_width, block_height;
1078     int level;
1079     int threshold[6];
1080     const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
1081
1082     /* figure out the acceptable level thresholds in advance */
1083     threshold[5] = QUALITY_THRESHOLD;
1084     for (level = 4; level >= 0; level--)
1085         threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
1086
1087     block_width = (width + 15) / 16;
1088     block_height = (height + 15) / 16;
1089
1090     if(s->picture.pict_type == P_TYPE){
1091         s->m.avctx= s->avctx;
1092         s->m.current_picture_ptr= &s->m.current_picture;
1093         s->m.last_picture_ptr   = &s->m.last_picture;
1094         s->m.last_picture.data[0]= ref_plane;
1095         s->m.linesize=
1096         s->m.last_picture.linesize[0]=
1097         s->m.new_picture.linesize[0]=
1098         s->m.current_picture.linesize[0]= stride;
1099         s->m.width= width;
1100         s->m.height= height;
1101         s->m.mb_width= block_width;
1102         s->m.mb_height= block_height;
1103         s->m.mb_stride= s->m.mb_width+1;
1104         s->m.b8_stride= 2*s->m.mb_width+1;
1105         s->m.f_code=1;
1106         s->m.pict_type= s->picture.pict_type;
1107         s->m.qscale= s->picture.quality/FF_QP2LAMBDA;
1108         s->m.me_method= s->avctx->me_method;
1109
1110         if(!s->motion_val8[plane]){
1111             s->motion_val8 [plane]= av_mallocz(s->m.b8_stride*block_height*2*2*sizeof(int16_t));
1112             s->motion_val16[plane]= av_mallocz(s->m.mb_stride*block_height*2*sizeof(int16_t));
1113         }
1114
1115         s->m.mb_type= s->mb_type;
1116
1117         //dummies, to avoid segfaults
1118         s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
1119         s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
1120         s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
1121         s->m.current_picture.mb_type= s->dummy;
1122
1123         s->m.current_picture.motion_val[0]= s->motion_val8[plane];
1124         s->m.p_mv_table= s->motion_val16[plane];
1125         s->m.dsp= s->dsp; //move
1126         ff_init_me(&s->m);
1127
1128         s->m.me.dia_size= s->avctx->dia_size;
1129         s->m.first_slice_line=1;
1130         for (y = 0; y < block_height; y++) {
1131             uint8_t src[stride*16];
1132
1133             s->m.new_picture.data[0]= src - y*16*stride; //ugly
1134             s->m.mb_y= y;
1135
1136             for(i=0; i<16 && i + 16*y<height; i++){
1137                 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1138                 for(x=width; x<16*block_width; x++)
1139                     src[i*stride+x]= src[i*stride+x-1];
1140             }
1141             for(; i<16 && i + 16*y<16*block_height; i++)
1142                 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1143
1144             for (x = 0; x < block_width; x++) {
1145                 s->m.mb_x= x;
1146                 ff_init_block_index(&s->m);
1147                 ff_update_block_index(&s->m);
1148
1149                 ff_estimate_p_frame_motion(&s->m, x, y);
1150             }
1151             s->m.first_slice_line=0;
1152         }
1153
1154         ff_fix_long_p_mvs(&s->m);
1155         ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
1156     }
1157
1158     s->m.first_slice_line=1;
1159     for (y = 0; y < block_height; y++) {
1160         uint8_t src[stride*16];
1161
1162         for(i=0; i<16 && i + 16*y<height; i++){
1163             memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1164             for(x=width; x<16*block_width; x++)
1165                 src[i*stride+x]= src[i*stride+x-1];
1166         }
1167         for(; i<16 && i + 16*y<16*block_height; i++)
1168             memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1169
1170         s->m.mb_y= y;
1171         for (x = 0; x < block_width; x++) {
1172             uint8_t reorder_buffer[3][6][7*32];
1173             int count[3][6];
1174             int offset = y * 16 * stride + x * 16;
1175             uint8_t *decoded= decoded_plane + offset;
1176             uint8_t *ref= ref_plane + offset;
1177             int score[4]={0,0,0,0}, best;
1178             uint8_t temp[16*stride];
1179
1180             s->m.mb_x= x;
1181             ff_init_block_index(&s->m);
1182             ff_update_block_index(&s->m);
1183
1184             if(s->picture.pict_type == I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
1185                 for(i=0; i<6; i++){
1186                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
1187                 }
1188                 if(s->picture.pict_type == P_TYPE){
1189                     const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
1190                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1191                     score[0]= vlc[1]*lambda;
1192                 }
1193                 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
1194                 for(i=0; i<6; i++){
1195                     count[0][i]= put_bits_count(&s->reorder_pb[i]);
1196                     flush_put_bits(&s->reorder_pb[i]);
1197                 }
1198             }else
1199                 score[0]= INT_MAX;
1200
1201             best=0;
1202
1203             if(s->picture.pict_type == P_TYPE){
1204                 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
1205                 int mx, my, pred_x, pred_y, dxy;
1206                 int16_t *motion_ptr;
1207
1208                 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
1209                 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
1210                     for(i=0; i<6; i++)
1211                         init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
1212
1213                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1214
1215                     s->m.pb= s->reorder_pb[5];
1216                     mx= motion_ptr[0];
1217                     my= motion_ptr[1];
1218                     assert(mx>=-32 && mx<=31);
1219                     assert(my>=-32 && my<=31);
1220                     assert(pred_x>=-32 && pred_x<=31);
1221                     assert(pred_y>=-32 && pred_y<=31);
1222                     ff_h263_encode_motion(&s->m, mx - pred_x, 1);
1223                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
1224                     s->reorder_pb[5]= s->m.pb;
1225                     score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
1226
1227                     dxy= (mx&1) + 2*(my&1);
1228
1229                     s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
1230
1231                     score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
1232                     best= score[1] <= score[0];
1233
1234                     vlc= svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
1235                     score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
1236                     score[2]+= vlc[1]*lambda;
1237                     if(score[2] < score[best] && mx==0 && my==0){
1238                         best=2;
1239                         s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
1240                         for(i=0; i<6; i++){
1241                             count[2][i]=0;
1242                         }
1243                         put_bits(&s->pb, vlc[1], vlc[0]);
1244                     }
1245                 }
1246
1247                 if(best==1){
1248                     for(i=0; i<6; i++){
1249                         count[1][i]= put_bits_count(&s->reorder_pb[i]);
1250                         flush_put_bits(&s->reorder_pb[i]);
1251                     }
1252                 }else{
1253                     motion_ptr[0                 ] = motion_ptr[1                 ]=
1254                     motion_ptr[2                 ] = motion_ptr[3                 ]=
1255                     motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
1256                     motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
1257                 }
1258             }
1259
1260             s->rd_total += score[best];
1261
1262             for(i=5; i>=0; i--){
1263                 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
1264             }
1265             if(best==0){
1266                 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
1267             }
1268         }
1269         s->m.first_slice_line=0;
1270     }
1271 }
1272
1273 static int svq1_encode_init(AVCodecContext *avctx)
1274 {
1275     SVQ1Context * const s = avctx->priv_data;
1276
1277     dsputil_init(&s->dsp, avctx);
1278     avctx->coded_frame= (AVFrame*)&s->picture;
1279
1280     s->frame_width = avctx->width;
1281     s->frame_height = avctx->height;
1282
1283     s->y_block_width = (s->frame_width + 15) / 16;
1284     s->y_block_height = (s->frame_height + 15) / 16;
1285
1286     s->c_block_width = (s->frame_width / 4 + 15) / 16;
1287     s->c_block_height = (s->frame_height / 4 + 15) / 16;
1288
1289     s->avctx= avctx;
1290     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1291     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1292     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1293     s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
1294     s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
1295     h263_encode_init(&s->m); //mv_penalty
1296
1297     return 0;
1298 }
1299
1300 static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
1301     int buf_size, void *data)
1302 {
1303     SVQ1Context * const s = avctx->priv_data;
1304     AVFrame *pict = data;
1305     AVFrame * const p= (AVFrame*)&s->picture;
1306     AVFrame temp;
1307     int i;
1308
1309     if(avctx->pix_fmt != PIX_FMT_YUV410P){
1310         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
1311         return -1;
1312     }
1313
1314     if(!s->current_picture.data[0]){
1315         avctx->get_buffer(avctx, &s->current_picture);
1316         avctx->get_buffer(avctx, &s->last_picture);
1317     }
1318
1319     temp= s->current_picture;
1320     s->current_picture= s->last_picture;
1321     s->last_picture= temp;
1322
1323     init_put_bits(&s->pb, buf, buf_size);
1324
1325     *p = *pict;
1326     p->pict_type = avctx->frame_number % avctx->gop_size ? P_TYPE : I_TYPE;
1327     p->key_frame = p->pict_type == I_TYPE;
1328
1329     svq1_write_header(s, p->pict_type);
1330     for(i=0; i<3; i++){
1331         svq1_encode_plane(s, i,
1332             s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
1333             s->frame_width / (i?4:1), s->frame_height / (i?4:1),
1334             s->picture.linesize[i], s->current_picture.linesize[i]);
1335     }
1336
1337 //    align_put_bits(&s->pb);
1338     while(put_bits_count(&s->pb) & 31)
1339         put_bits(&s->pb, 1, 0);
1340
1341     flush_put_bits(&s->pb);
1342
1343     return (put_bits_count(&s->pb) / 8);
1344 }
1345
1346 static int svq1_encode_end(AVCodecContext *avctx)
1347 {
1348     SVQ1Context * const s = avctx->priv_data;
1349     int i;
1350
1351     av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
1352
1353     av_freep(&s->m.me.scratchpad);
1354     av_freep(&s->m.me.map);
1355     av_freep(&s->m.me.score_map);
1356     av_freep(&s->mb_type);
1357     av_freep(&s->dummy);
1358
1359     for(i=0; i<3; i++){
1360         av_freep(&s->motion_val8[i]);
1361         av_freep(&s->motion_val16[i]);
1362     }
1363
1364     return 0;
1365 }
1366
1367 #endif //CONFIG_ENCODERS
1368
1369 AVCodec svq1_decoder = {
1370     "svq1",
1371     CODEC_TYPE_VIDEO,
1372     CODEC_ID_SVQ1,
1373     sizeof(MpegEncContext),
1374     svq1_decode_init,
1375     NULL,
1376     svq1_decode_end,
1377     svq1_decode_frame,
1378     CODEC_CAP_DR1,
1379     .flush= ff_mpeg_flush,
1380     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1381 };
1382
1383 #ifdef CONFIG_ENCODERS
1384
1385 AVCodec svq1_encoder = {
1386     "svq1",
1387     CODEC_TYPE_VIDEO,
1388     CODEC_ID_SVQ1,
1389     sizeof(SVQ1Context),
1390     svq1_encode_init,
1391     svq1_encode_frame,
1392     svq1_encode_end,
1393     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1394 };
1395
1396 #endif //CONFIG_ENCODERS