libavcodec/snow.c

   1 /*
   2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "avcodec.h"
  22 #include "dsputil.h"
  23 #include "snow.h"
  24
  25 #include "rangecoder.h"
  26
  27 #include "mpegvideo.h"
  28
  29 #undef NDEBUG
  30 #include <assert.h>
  31
  32 static const int8_t quant3[256]={
  33  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  34  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  35  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  36  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  37  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  39  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  40  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
  49 };
  50 static const int8_t quant3b[256]={
  51  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  52  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  53  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  54  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  55  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  56  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  57  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  58  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  67 };
  68 static const int8_t quant3bA[256]={
  69  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  70  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  71  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  72  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  73  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  74  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  75  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  76  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  77  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  78  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  79  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  80  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  81  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  82  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  83  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  84  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  85 };
  86 static const int8_t quant5[256]={
  87  0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  88  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  89  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  90  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  91  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  92  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  93  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
 103 };
 104 static const int8_t quant7[256]={
 105  0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 106  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 107  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
 108  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 109  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 110  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 111  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 112  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
 119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
 121 };
 122 static const int8_t quant9[256]={
 123  0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 124  3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 125  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 126  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 127  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 128  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 129  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 130  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
 138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
 139 };
 140 static const int8_t quant11[256]={
 141  0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
 142  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 143  4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 144  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 145  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 146  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 147  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 148  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
 155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
 157 };
 158 static const int8_t quant13[256]={
 159  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
 160  4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 161  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 162  5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 163  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 164  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 165  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 166  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
 172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
 175 };
 176
 177 #if 0 //64*cubic
 178 static const uint8_t obmc32[1024]={
 179   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 180   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 181   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 182   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
 183   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
 184   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
 185   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
 186   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
 187   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
 188   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
 189   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
 190   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
 191   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
 192   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
 193   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
 194   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
 195   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
 196   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
 197   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
 198   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
 199   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
 200   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
 201   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
 202   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
 203   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
 204   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
 205   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
 206   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
 207   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
 208   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 209   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 210   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 211 //error:0.000022
 212 };
 213 static const uint8_t obmc16[256]={
 214   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 215   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
 216   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
 217   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
 218   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
 219   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
 220   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
 221   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
 222   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
 223   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
 224   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
 225   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
 226   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
 227   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
 228   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
 229   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 230 //error:0.000033
 231 };
 232 #elif 1 // 64*linear
 233 static const uint8_t obmc32[1024]={
 234   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
 235   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
 236   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
 237   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
 238   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
 239   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
 240   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
 241   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
 242   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
 243   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
 244   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
 245   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
 246   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
 247   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
 248   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
 249   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
 250   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
 251   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
 252   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
 253   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
 254   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
 255   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
 256   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
 257   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
 258   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
 259   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
 260   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
 261   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
 262   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
 263   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
 264   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
 265   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
 266  //error:0.000020
 267 };
 268 static const uint8_t obmc16[256]={
 269   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 270   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
 271   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
 272   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
 273   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
 274  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
 275  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
 276  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
 277  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
 278  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
 279  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
 280   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
 281   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
 282   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
 283   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
 284   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 285 //error:0.000015
 286 };
 287 #else //64*cos
 288 static const uint8_t obmc32[1024]={
 289   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 290   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 291   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 292   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
 293   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
 294   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
 295   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
 296   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
 297   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
 298   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
 299   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
 300   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
 301   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
 302   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
 303   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
 304   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
 305   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
 306   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
 307   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
 308   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
 309   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
 310   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
 311   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
 312   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
 313   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
 314   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
 315   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
 316   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
 317   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
 318   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 319   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 320   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 321 //error:0.000022
 322 };
 323 static const uint8_t obmc16[256]={
 324   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 325   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
 326   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
 327   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
 328   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
 329   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
 330   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
 331   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
 332   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
 333   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
 334   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
 335   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
 336   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
 337   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
 338   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
 339   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 340 //error:0.000022
 341 };
 342 #endif
 343
 344 //linear *64
 345 static const uint8_t obmc8[64]={
 346   4, 12, 20, 28, 28, 20, 12,  4,
 347  12, 36, 60, 84, 84, 60, 36, 12,
 348  20, 60,100,140,140,100, 60, 20,
 349  28, 84,140,196,196,140, 84, 28,
 350  28, 84,140,196,196,140, 84, 28,
 351  20, 60,100,140,140,100, 60, 20,
 352  12, 36, 60, 84, 84, 60, 36, 12,
 353   4, 12, 20, 28, 28, 20, 12,  4,
 354 //error:0.000000
 355 };
 356
 357 //linear *64
 358 static const uint8_t obmc4[16]={
 359  16, 48, 48, 16,
 360  48,144,144, 48,
 361  48,144,144, 48,
 362  16, 48, 48, 16,
 363 //error:0.000000
 364 };
 365
 366 static const uint8_t *obmc_tab[4]={
 367     obmc32, obmc16, obmc8, obmc4
 368 };
 369
 370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
 371
 372 typedef struct BlockNode{
 373     int16_t mx;
 374     int16_t my;
 375     uint8_t ref;
 376     uint8_t color[3];
 377     uint8_t type;
 378 //#define TYPE_SPLIT    1
 379 #define BLOCK_INTRA   1
 380 #define BLOCK_OPT     2
 381 //#define TYPE_NOCOLOR  4
 382     uint8_t level; //FIXME merge into type?
 383 }BlockNode;
 384
 385 static const BlockNode null_block= { //FIXME add border maybe
 386     .color= {128,128,128},
 387     .mx= 0,
 388     .my= 0,
 389     .ref= 0,
 390     .type= 0,
 391     .level= 0,
 392 };
 393
 394 #define LOG2_MB_SIZE 4
 395 #define MB_SIZE (1<<LOG2_MB_SIZE)
 396 #define ENCODER_EXTRA_BITS 4
 397 #define HTAPS_MAX 8
 398
 399 typedef struct x_and_coeff{
 400     int16_t x;
 401     uint16_t coeff;
 402 } x_and_coeff;
 403
 404 typedef struct SubBand{
 405     int level;
 406     int stride;
 407     int width;
 408     int height;
 409     int qlog;                                   ///< log(qscale)/log[2^(1/6)]
 410     DWTELEM *buf;
 411     IDWTELEM *ibuf;
 412     int buf_x_offset;
 413     int buf_y_offset;
 414     int stride_line; ///< Stride measured in lines, not pixels.
 415     x_and_coeff * x_coeff;
 416     struct SubBand *parent;
 417     uint8_t state[/*7*2*/ 7 + 512][32];
 418 }SubBand;
 419
 420 typedef struct Plane{
 421     int width;
 422     int height;
 423     SubBand band[MAX_DECOMPOSITIONS][4];
 424
 425     int htaps;
 426     int8_t hcoeff[HTAPS_MAX/2];
 427     int diag_mc;
 428     int fast_mc;
 429
 430     int last_htaps;
 431     int8_t last_hcoeff[HTAPS_MAX/2];
 432     int last_diag_mc;
 433 }Plane;
 434
 435 typedef struct SnowContext{
 436 //    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 437
 438     AVCodecContext *avctx;
 439     RangeCoder c;
 440     DSPContext dsp;
 441     AVFrame new_picture;
 442     AVFrame input_picture;              ///< new_picture with the internal linesizes
 443     AVFrame current_picture;
 444     AVFrame last_picture[MAX_REF_FRAMES];
 445     uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
 446     AVFrame mconly_picture;
 447 //     uint8_t q_context[16];
 448     uint8_t header_state[32];
 449     uint8_t block_state[128 + 32*128];
 450     int keyframe;
 451     int always_reset;
 452     int version;
 453     int spatial_decomposition_type;
 454     int last_spatial_decomposition_type;
 455     int temporal_decomposition_type;
 456     int spatial_decomposition_count;
 457     int temporal_decomposition_count;
 458     int max_ref_frames;
 459     int ref_frames;
 460     int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
 461     uint32_t *ref_scores[MAX_REF_FRAMES];
 462     DWTELEM *spatial_dwt_buffer;
 463     IDWTELEM *spatial_idwt_buffer;
 464     int colorspace_type;
 465     int chroma_h_shift;
 466     int chroma_v_shift;
 467     int spatial_scalability;
 468     int qlog;
 469     int last_qlog;
 470     int lambda;
 471     int lambda2;
 472     int pass1_rc;
 473     int mv_scale;
 474     int last_mv_scale;
 475     int qbias;
 476     int last_qbias;
 477 #define QBIAS_SHIFT 3
 478     int b_width;
 479     int b_height;
 480     int block_max_depth;
 481     int last_block_max_depth;
 482     Plane plane[MAX_PLANES];
 483     BlockNode *block;
 484 #define ME_CACHE_SIZE 1024
 485     int me_cache[ME_CACHE_SIZE];
 486     int me_cache_generation;
 487     slice_buffer sb;
 488
 489     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 490 }SnowContext;
 491
 492 typedef struct {
 493     IDWTELEM *b0;
 494     IDWTELEM *b1;
 495     IDWTELEM *b2;
 496     IDWTELEM *b3;
 497     int y;
 498 } dwt_compose_t;
 499
 500 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
 501 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
 502
 503 static void iterative_me(SnowContext *s);
 504
 505 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
 506 {
 507     int i;
 508
 509     buf->base_buffer = base_buffer;
 510     buf->line_count = line_count;
 511     buf->line_width = line_width;
 512     buf->data_count = max_allocated_lines;
 513     buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
 514     buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
 515
 516     for (i = 0; i < max_allocated_lines; i++)
 517     {
 518         buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
 519     }
 520
 521     buf->data_stack_top = max_allocated_lines - 1;
 522 }
 523
 524 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
 525 {
 526     int offset;
 527     IDWTELEM * buffer;
 528
 529 //  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
 530
 531     assert(buf->data_stack_top >= 0);
 532 //  assert(!buf->line[line]);
 533     if (buf->line[line])
 534         return buf->line[line];
 535
 536     offset = buf->line_width * line;
 537     buffer = buf->data_stack[buf->data_stack_top];
 538     buf->data_stack_top--;
 539     buf->line[line] = buffer;
 540
 541 //  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
 542
 543     return buffer;
 544 }
 545
 546 static void slice_buffer_release(slice_buffer * buf, int line)
 547 {
 548     int offset;
 549     IDWTELEM * buffer;
 550
 551     assert(line >= 0 && line < buf->line_count);
 552     assert(buf->line[line]);
 553
 554     offset = buf->line_width * line;
 555     buffer = buf->line[line];
 556     buf->data_stack_top++;
 557     buf->data_stack[buf->data_stack_top] = buffer;
 558     buf->line[line] = NULL;
 559
 560 //  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
 561 }
 562
 563 static void slice_buffer_flush(slice_buffer * buf)
 564 {
 565     int i;
 566     for (i = 0; i < buf->line_count; i++)
 567     {
 568         if (buf->line[i])
 569         {
 570 //      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
 571             slice_buffer_release(buf, i);
 572         }
 573     }
 574 }
 575
 576 static void slice_buffer_destroy(slice_buffer * buf)
 577 {
 578     int i;
 579     slice_buffer_flush(buf);
 580
 581     for (i = buf->data_count - 1; i >= 0; i--)
 582     {
 583         av_freep(&buf->data_stack[i]);
 584     }
 585     av_freep(&buf->data_stack);
 586     av_freep(&buf->line);
 587 }
 588
 589 #ifdef __sgi
 590 // Avoid a name clash on SGI IRIX
 591 #undef qexp
 592 #endif
 593 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
 594 static uint8_t qexp[QROOT];
 595
 596 static inline int mirror(int v, int m){
 597     while((unsigned)v > (unsigned)m){
 598         v=-v;
 599         if(v<0) v+= 2*m;
 600     }
 601     return v;
 602 }
 603
 604 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
 605     int i;
 606
 607     if(v){
 608         const int a= FFABS(v);
 609         const int e= av_log2(a);
 610 #if 1
 611         const int el= FFMIN(e, 10);
 612         put_rac(c, state+0, 0);
 613
 614         for(i=0; i<el; i++){
 615             put_rac(c, state+1+i, 1);  //1..10
 616         }
 617         for(; i<e; i++){
 618             put_rac(c, state+1+9, 1);  //1..10
 619         }
 620         put_rac(c, state+1+FFMIN(i,9), 0);
 621
 622         for(i=e-1; i>=el; i--){
 623             put_rac(c, state+22+9, (a>>i)&1); //22..31
 624         }
 625         for(; i>=0; i--){
 626             put_rac(c, state+22+i, (a>>i)&1); //22..31
 627         }
 628
 629         if(is_signed)
 630             put_rac(c, state+11 + el, v < 0); //11..21
 631 #else
 632
 633         put_rac(c, state+0, 0);
 634         if(e<=9){
 635             for(i=0; i<e; i++){
 636                 put_rac(c, state+1+i, 1);  //1..10
 637             }
 638             put_rac(c, state+1+i, 0);
 639
 640             for(i=e-1; i>=0; i--){
 641                 put_rac(c, state+22+i, (a>>i)&1); //22..31
 642             }
 643
 644             if(is_signed)
 645                 put_rac(c, state+11 + e, v < 0); //11..21
 646         }else{
 647             for(i=0; i<e; i++){
 648                 put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
 649             }
 650             put_rac(c, state+1+FFMIN(i,9), 0);
 651
 652             for(i=e-1; i>=0; i--){
 653                 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
 654             }
 655
 656             if(is_signed)
 657                 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
 658         }
 659 #endif
 660     }else{
 661         put_rac(c, state+0, 1);
 662     }
 663 }
 664
 665 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
 666     if(get_rac(c, state+0))
 667         return 0;
 668     else{
 669         int i, e, a;
 670         e= 0;
 671         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
 672             e++;
 673         }
 674
 675         a= 1;
 676         for(i=e-1; i>=0; i--){
 677             a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
 678         }
 679
 680         if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
 681             return -a;
 682         else
 683             return a;
 684     }
 685 }
 686
 687 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
 688     int i;
 689     int r= log2>=0 ? 1<<log2 : 1;
 690
 691     assert(v>=0);
 692     assert(log2>=-4);
 693
 694     while(v >= r){
 695         put_rac(c, state+4+log2, 1);
 696         v -= r;
 697         log2++;
 698         if(log2>0) r+=r;
 699     }
 700     put_rac(c, state+4+log2, 0);
 701
 702     for(i=log2-1; i>=0; i--){
 703         put_rac(c, state+31-i, (v>>i)&1);
 704     }
 705 }
 706
 707 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
 708     int i;
 709     int r= log2>=0 ? 1<<log2 : 1;
 710     int v=0;
 711
 712     assert(log2>=-4);
 713
 714     while(get_rac(c, state+4+log2)){
 715         v+= r;
 716         log2++;
 717         if(log2>0) r+=r;
 718     }
 719
 720     for(i=log2-1; i>=0; i--){
 721         v+= get_rac(c, state+31-i)<<i;
 722     }
 723
 724     return v;
 725 }
 726
 727 static av_always_inline void
 728 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
 729      int dst_step, int src_step, int ref_step,
 730      int width, int mul, int add, int shift,
 731      int highpass, int inverse){
 732     const int mirror_left= !highpass;
 733     const int mirror_right= (width&1) ^ highpass;
 734     const int w= (width>>1) - 1 + (highpass & width);
 735     int i;
 736
 737 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
 738     if(mirror_left){
 739         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
 740         dst += dst_step;
 741         src += src_step;
 742     }
 743
 744     for(i=0; i<w; i++){
 745         dst[i*dst_step] =
 746             LIFT(src[i*src_step],
 747                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
 748                  inverse);
 749     }
 750
 751     if(mirror_right){
 752         dst[w*dst_step] =
 753             LIFT(src[w*src_step],
 754                  ((mul*2*ref[w*ref_step]+add)>>shift),
 755                  inverse);
 756     }
 757 }
 758
 759 static av_always_inline void
 760 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
 761          int dst_step, int src_step, int ref_step,
 762          int width, int mul, int add, int shift,
 763          int highpass, int inverse){
 764     const int mirror_left= !highpass;
 765     const int mirror_right= (width&1) ^ highpass;
 766     const int w= (width>>1) - 1 + (highpass & width);
 767     int i;
 768
 769 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
 770     if(mirror_left){
 771         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
 772         dst += dst_step;
 773         src += src_step;
 774     }
 775
 776     for(i=0; i<w; i++){
 777         dst[i*dst_step] =
 778             LIFT(src[i*src_step],
 779                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
 780                  inverse);
 781     }
 782
 783     if(mirror_right){
 784         dst[w*dst_step] =
 785             LIFT(src[w*src_step],
 786                  ((mul*2*ref[w*ref_step]+add)>>shift),
 787                  inverse);
 788     }
 789 }
 790
 791 #ifndef liftS
 792 static av_always_inline void
 793 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
 794       int dst_step, int src_step, int ref_step,
 795       int width, int mul, int add, int shift,
 796       int highpass, int inverse){
 797     const int mirror_left= !highpass;
 798     const int mirror_right= (width&1) ^ highpass;
 799     const int w= (width>>1) - 1 + (highpass & width);
 800     int i;
 801
 802     assert(shift == 4);
 803 #define LIFTS(src, ref, inv) \
 804         ((inv) ? \
 805             (src) + (((ref) + 4*(src))>>shift): \
 806             -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
 807     if(mirror_left){
 808         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
 809         dst += dst_step;
 810         src += src_step;
 811     }
 812
 813     for(i=0; i<w; i++){
 814         dst[i*dst_step] =
 815             LIFTS(src[i*src_step],
 816                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
 817                   inverse);
 818     }
 819
 820     if(mirror_right){
 821         dst[w*dst_step] =
 822             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
 823     }
 824 }
 825 static av_always_inline void
 826 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
 827           int dst_step, int src_step, int ref_step,
 828           int width, int mul, int add, int shift,
 829           int highpass, int inverse){
 830     const int mirror_left= !highpass;
 831     const int mirror_right= (width&1) ^ highpass;
 832     const int w= (width>>1) - 1 + (highpass & width);
 833     int i;
 834
 835     assert(shift == 4);
 836 #define LIFTS(src, ref, inv) \
 837     ((inv) ? \
 838         (src) + (((ref) + 4*(src))>>shift): \
 839         -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
 840     if(mirror_left){
 841         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
 842         dst += dst_step;
 843         src += src_step;
 844     }
 845
 846     for(i=0; i<w; i++){
 847         dst[i*dst_step] =
 848             LIFTS(src[i*src_step],
 849                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
 850                   inverse);
 851     }
 852
 853     if(mirror_right){
 854         dst[w*dst_step] =
 855             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
 856     }
 857 }
 858 #endif
 859
 860 static void horizontal_decompose53i(DWTELEM *b, int width){
 861     DWTELEM temp[width];
 862     const int width2= width>>1;
 863     int x;
 864     const int w2= (width+1)>>1;
 865
 866     for(x=0; x<width2; x++){
 867         temp[x   ]= b[2*x    ];
 868         temp[x+w2]= b[2*x + 1];
 869     }
 870     if(width&1)
 871         temp[x   ]= b[2*x    ];
 872 #if 0
 873     {
 874     int A1,A2,A3,A4;
 875     A2= temp[1       ];
 876     A4= temp[0       ];
 877     A1= temp[0+width2];
 878     A1 -= (A2 + A4)>>1;
 879     A4 += (A1 + 1)>>1;
 880     b[0+width2] = A1;
 881     b[0       ] = A4;
 882     for(x=1; x+1<width2; x+=2){
 883         A3= temp[x+width2];
 884         A4= temp[x+1     ];
 885         A3 -= (A2 + A4)>>1;
 886         A2 += (A1 + A3 + 2)>>2;
 887         b[x+width2] = A3;
 888         b[x       ] = A2;
 889
 890         A1= temp[x+1+width2];
 891         A2= temp[x+2       ];
 892         A1 -= (A2 + A4)>>1;
 893         A4 += (A1 + A3 + 2)>>2;
 894         b[x+1+width2] = A1;
 895         b[x+1       ] = A4;
 896     }
 897     A3= temp[width-1];
 898     A3 -= A2;
 899     A2 += (A1 + A3 + 2)>>2;
 900     b[width -1] = A3;
 901     b[width2-1] = A2;
 902     }
 903 #else
 904     lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
 905     lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
 906 #endif
 907 }
 908
 909 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 910     int i;
 911
 912     for(i=0; i<width; i++){
 913         b1[i] -= (b0[i] + b2[i])>>1;
 914     }
 915 }
 916
 917 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 918     int i;
 919
 920     for(i=0; i<width; i++){
 921         b1[i] += (b0[i] + b2[i] + 2)>>2;
 922     }
 923 }
 924
 925 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
 926     int y;
 927     DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
 928     DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
 929
 930     for(y=-2; y<height; y+=2){
 931         DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
 932         DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
 933
 934 {START_TIMER
 935         if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
 936         if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
 937 STOP_TIMER("horizontal_decompose53i")}
 938
 939 {START_TIMER
 940         if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
 941         if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
 942 STOP_TIMER("vertical_decompose53i*")}
 943
 944         b0=b2;
 945         b1=b3;
 946     }
 947 }
 948
 949 static void horizontal_decompose97i(DWTELEM *b, int width){
 950     DWTELEM temp[width];
 951     const int w2= (width+1)>>1;
 952
 953     lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
 954     liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
 955     lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
 956     lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
 957 }
 958
 959
 960 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 961     int i;
 962
 963     for(i=0; i<width; i++){
 964         b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
 965     }
 966 }
 967
 968 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 969     int i;
 970
 971     for(i=0; i<width; i++){
 972         b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
 973     }
 974 }
 975
 976 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 977     int i;
 978
 979     for(i=0; i<width; i++){
 980 #ifdef liftS
 981         b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
 982 #else
 983         b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
 984 #endif
 985     }
 986 }
 987
 988 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 989     int i;
 990
 991     for(i=0; i<width; i++){
 992         b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
 993     }
 994 }
 995
 996 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
 997     int y;
 998     DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
 999     DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1000     DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1001     DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1002
1003     for(y=-4; y<height; y+=2){
1004         DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1005         DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1006
1007 {START_TIMER
1008         if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1009         if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1010 if(width>400){
1011 STOP_TIMER("horizontal_decompose97i")
1012 }}
1013
1014 {START_TIMER
1015         if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1016         if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1017         if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1018         if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1019
1020 if(width>400){
1021 STOP_TIMER("vertical_decompose97i")
1022 }}
1023
1024         b0=b2;
1025         b1=b3;
1026         b2=b4;
1027         b3=b5;
1028     }
1029 }
1030
1031 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1032     int level;
1033
1034     for(level=0; level<decomposition_count; level++){
1035         switch(type){
1036         case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1037         case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1038         }
1039     }
1040 }
1041
1042 static void horizontal_compose53i(IDWTELEM *b, int width){
1043     IDWTELEM temp[width];
1044     const int width2= width>>1;
1045     const int w2= (width+1)>>1;
1046     int x;
1047
1048 #if 0
1049     int A1,A2,A3,A4;
1050     A2= temp[1       ];
1051     A4= temp[0       ];
1052     A1= temp[0+width2];
1053     A1 -= (A2 + A4)>>1;
1054     A4 += (A1 + 1)>>1;
1055     b[0+width2] = A1;
1056     b[0       ] = A4;
1057     for(x=1; x+1<width2; x+=2){
1058         A3= temp[x+width2];
1059         A4= temp[x+1     ];
1060         A3 -= (A2 + A4)>>1;
1061         A2 += (A1 + A3 + 2)>>2;
1062         b[x+width2] = A3;
1063         b[x       ] = A2;
1064
1065         A1= temp[x+1+width2];
1066         A2= temp[x+2       ];
1067         A1 -= (A2 + A4)>>1;
1068         A4 += (A1 + A3 + 2)>>2;
1069         b[x+1+width2] = A1;
1070         b[x+1       ] = A4;
1071     }
1072     A3= temp[width-1];
1073     A3 -= A2;
1074     A2 += (A1 + A3 + 2)>>2;
1075     b[width -1] = A3;
1076     b[width2-1] = A2;
1077 #else
1078     inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1079     inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1080 #endif
1081     for(x=0; x<width2; x++){
1082         b[2*x    ]= temp[x   ];
1083         b[2*x + 1]= temp[x+w2];
1084     }
1085     if(width&1)
1086         b[2*x    ]= temp[x   ];
1087 }
1088
1089 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1090     int i;
1091
1092     for(i=0; i<width; i++){
1093         b1[i] += (b0[i] + b2[i])>>1;
1094     }
1095 }
1096
1097 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1098     int i;
1099
1100     for(i=0; i<width; i++){
1101         b1[i] -= (b0[i] + b2[i] + 2)>>2;
1102     }
1103 }
1104
1105 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1106     cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1107     cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1108     cs->y = -1;
1109 }
1110
1111 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1112     cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1113     cs->b1 = buffer + mirror(-1  , height-1)*stride;
1114     cs->y = -1;
1115 }
1116
1117 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1118     int y= cs->y;
1119
1120     IDWTELEM *b0= cs->b0;
1121     IDWTELEM *b1= cs->b1;
1122     IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1123     IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1124
1125 {START_TIMER
1126         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1127         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1128 STOP_TIMER("vertical_compose53i*")}
1129
1130 {START_TIMER
1131         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1132         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1133 STOP_TIMER("horizontal_compose53i")}
1134
1135     cs->b0 = b2;
1136     cs->b1 = b3;
1137     cs->y += 2;
1138 }
1139
1140 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1141     int y= cs->y;
1142     IDWTELEM *b0= cs->b0;
1143     IDWTELEM *b1= cs->b1;
1144     IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1145     IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1146
1147 {START_TIMER
1148         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1149         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1150 STOP_TIMER("vertical_compose53i*")}
1151
1152 {START_TIMER
1153         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1154         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1155 STOP_TIMER("horizontal_compose53i")}
1156
1157     cs->b0 = b2;
1158     cs->b1 = b3;
1159     cs->y += 2;
1160 }
1161
1162 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1163     dwt_compose_t cs;
1164     spatial_compose53i_init(&cs, buffer, height, stride);
1165     while(cs.y <= height)
1166         spatial_compose53i_dy(&cs, buffer, width, height, stride);
1167 }
1168
1169
1170 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1171     IDWTELEM temp[width];
1172     const int w2= (width+1)>>1;
1173
1174     inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1175     inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1176     inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1177     inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1178 }
1179
1180 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1181     int i;
1182
1183     for(i=0; i<width; i++){
1184         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1185     }
1186 }
1187
1188 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1189     int i;
1190
1191     for(i=0; i<width; i++){
1192         b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1193     }
1194 }
1195
1196 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1197     int i;
1198
1199     for(i=0; i<width; i++){
1200 #ifdef liftS
1201         b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1202 #else
1203         b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1204 #endif
1205     }
1206 }
1207
1208 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1209     int i;
1210
1211     for(i=0; i<width; i++){
1212         b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1213     }
1214 }
1215
1216 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1217     int i;
1218
1219     for(i=0; i<width; i++){
1220         b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1221         b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1222 #ifdef liftS
1223         b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1224 #else
1225         b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1226 #endif
1227         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1228     }
1229 }
1230
1231 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1232     cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1233     cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1234     cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1235     cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1236     cs->y = -3;
1237 }
1238
1239 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1240     cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1241     cs->b1 = buffer + mirror(-3  , height-1)*stride;
1242     cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1243     cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1244     cs->y = -3;
1245 }
1246
1247 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1248     int y = cs->y;
1249
1250     IDWTELEM *b0= cs->b0;
1251     IDWTELEM *b1= cs->b1;
1252     IDWTELEM *b2= cs->b2;
1253     IDWTELEM *b3= cs->b3;
1254     IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1255     IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1256
1257 {START_TIMER
1258     if(y>0 && y+4<height){
1259         dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1260     }else{
1261         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1262         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1263         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1264         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1265     }
1266 if(width>400){
1267 STOP_TIMER("vertical_compose97i")}}
1268
1269 {START_TIMER
1270         if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1271         if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1272 if(width>400 && y+0<(unsigned)height){
1273 STOP_TIMER("horizontal_compose97i")}}
1274
1275     cs->b0=b2;
1276     cs->b1=b3;
1277     cs->b2=b4;
1278     cs->b3=b5;
1279     cs->y += 2;
1280 }
1281
1282 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1283     int y = cs->y;
1284     IDWTELEM *b0= cs->b0;
1285     IDWTELEM *b1= cs->b1;
1286     IDWTELEM *b2= cs->b2;
1287     IDWTELEM *b3= cs->b3;
1288     IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1289     IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1290
1291 {START_TIMER
1292         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1293         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1294         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1295         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1296 if(width>400){
1297 STOP_TIMER("vertical_compose97i")}}
1298
1299 {START_TIMER
1300         if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1301         if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1302 if(width>400 && b0 <= b2){
1303 STOP_TIMER("horizontal_compose97i")}}
1304
1305     cs->b0=b2;
1306     cs->b1=b3;
1307     cs->b2=b4;
1308     cs->b3=b5;
1309     cs->y += 2;
1310 }
1311
1312 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1313     dwt_compose_t cs;
1314     spatial_compose97i_init(&cs, buffer, height, stride);
1315     while(cs.y <= height)
1316         spatial_compose97i_dy(&cs, buffer, width, height, stride);
1317 }
1318
1319 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1320     int level;
1321     for(level=decomposition_count-1; level>=0; level--){
1322         switch(type){
1323         case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1324         case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1325         }
1326     }
1327 }
1328
1329 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1330     int level;
1331     for(level=decomposition_count-1; level>=0; level--){
1332         switch(type){
1333         case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1334         case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1335         }
1336     }
1337 }
1338
1339 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1340     const int support = type==1 ? 3 : 5;
1341     int level;
1342     if(type==2) return;
1343
1344     for(level=decomposition_count-1; level>=0; level--){
1345         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1346             switch(type){
1347             case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1348                     break;
1349             case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1350                     break;
1351             }
1352         }
1353     }
1354 }
1355
1356 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1357     const int support = type==1 ? 3 : 5;
1358     int level;
1359     if(type==2) return;
1360
1361     for(level=decomposition_count-1; level>=0; level--){
1362         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1363             switch(type){
1364             case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1365                     break;
1366             case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1367                     break;
1368             }
1369         }
1370     }
1371 }
1372
1373 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1374         dwt_compose_t cs[MAX_DECOMPOSITIONS];
1375         int y;
1376         ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1377         for(y=0; y<height; y+=4)
1378             ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1379 }
1380
1381 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1382     const int w= b->width;
1383     const int h= b->height;
1384     int x, y;
1385
1386     if(1){
1387         int run=0;
1388         int runs[w*h];
1389         int run_index=0;
1390         int max_index;
1391
1392         for(y=0; y<h; y++){
1393             for(x=0; x<w; x++){
1394                 int v, p=0;
1395                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1396                 v= src[x + y*stride];
1397
1398                 if(y){
1399                     t= src[x + (y-1)*stride];
1400                     if(x){
1401                         lt= src[x - 1 + (y-1)*stride];
1402                     }
1403                     if(x + 1 < w){
1404                         rt= src[x + 1 + (y-1)*stride];
1405                     }
1406                 }
1407                 if(x){
1408                     l= src[x - 1 + y*stride];
1409                     /*if(x > 1){
1410                         if(orientation==1) ll= src[y + (x-2)*stride];
1411                         else               ll= src[x - 2 + y*stride];
1412                     }*/
1413                 }
1414                 if(parent){
1415                     int px= x>>1;
1416                     int py= y>>1;
1417                     if(px<b->parent->width && py<b->parent->height)
1418                         p= parent[px + py*2*stride];
1419                 }
1420                 if(!(/*ll|*/l|lt|t|rt|p)){
1421                     if(v){
1422                         runs[run_index++]= run;
1423                         run=0;
1424                     }else{
1425                         run++;
1426                     }
1427                 }
1428             }
1429         }
1430         max_index= run_index;
1431         runs[run_index++]= run;
1432         run_index=0;
1433         run= runs[run_index++];
1434
1435         put_symbol2(&s->c, b->state[30], max_index, 0);
1436         if(run_index <= max_index)
1437             put_symbol2(&s->c, b->state[1], run, 3);
1438
1439         for(y=0; y<h; y++){
1440             if(s->c.bytestream_end - s->c.bytestream < w*40){
1441                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1442                 return -1;
1443             }
1444             for(x=0; x<w; x++){
1445                 int v, p=0;
1446                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1447                 v= src[x + y*stride];
1448
1449                 if(y){
1450                     t= src[x + (y-1)*stride];
1451                     if(x){
1452                         lt= src[x - 1 + (y-1)*stride];
1453                     }
1454                     if(x + 1 < w){
1455                         rt= src[x + 1 + (y-1)*stride];
1456                     }
1457                 }
1458                 if(x){
1459                     l= src[x - 1 + y*stride];
1460                     /*if(x > 1){
1461                         if(orientation==1) ll= src[y + (x-2)*stride];
1462                         else               ll= src[x - 2 + y*stride];
1463                     }*/
1464                 }
1465                 if(parent){
1466                     int px= x>>1;
1467                     int py= y>>1;
1468                     if(px<b->parent->width && py<b->parent->height)
1469                         p= parent[px + py*2*stride];
1470                 }
1471                 if(/*ll|*/l|lt|t|rt|p){
1472                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1473
1474                     put_rac(&s->c, &b->state[0][context], !!v);
1475                 }else{
1476                     if(!run){
1477                         run= runs[run_index++];
1478
1479                         if(run_index <= max_index)
1480                             put_symbol2(&s->c, b->state[1], run, 3);
1481                         assert(v);
1482                     }else{
1483                         run--;
1484                         assert(!v);
1485                     }
1486                 }
1487                 if(v){
1488                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1489                     int l2= 2*FFABS(l) + (l<0);
1490                     int t2= 2*FFABS(t) + (t<0);
1491
1492                     put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1493                     put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1494                 }
1495             }
1496         }
1497     }
1498     return 0;
1499 }
1500
1501 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1502 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
1503 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
1504     return encode_subband_c0run(s, b, src, parent, stride, orientation);
1505 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
1506 }
1507
1508 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1509     const int w= b->width;
1510     const int h= b->height;
1511     int x,y;
1512
1513     if(1){
1514         int run, runs;
1515         x_and_coeff *xc= b->x_coeff;
1516         x_and_coeff *prev_xc= NULL;
1517         x_and_coeff *prev2_xc= xc;
1518         x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1519         x_and_coeff *prev_parent_xc= parent_xc;
1520
1521         runs= get_symbol2(&s->c, b->state[30], 0);
1522         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1523         else           run= INT_MAX;
1524
1525         for(y=0; y<h; y++){
1526             int v=0;
1527             int lt=0, t=0, rt=0;
1528
1529             if(y && prev_xc->x == 0){
1530                 rt= prev_xc->coeff;
1531             }
1532             for(x=0; x<w; x++){
1533                 int p=0;
1534                 const int l= v;
1535
1536                 lt= t; t= rt;
1537
1538                 if(y){
1539                     if(prev_xc->x <= x)
1540                         prev_xc++;
1541                     if(prev_xc->x == x + 1)
1542                         rt= prev_xc->coeff;
1543                     else
1544                         rt=0;
1545                 }
1546                 if(parent_xc){
1547                     if(x>>1 > parent_xc->x){
1548                         parent_xc++;
1549                     }
1550                     if(x>>1 == parent_xc->x){
1551                         p= parent_xc->coeff;
1552                     }
1553                 }
1554                 if(/*ll|*/l|lt|t|rt|p){
1555                     int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1556
1557                     v=get_rac(&s->c, &b->state[0][context]);
1558                     if(v){
1559                         v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1560                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1561
1562                         xc->x=x;
1563                         (xc++)->coeff= v;
1564                     }
1565                 }else{
1566                     if(!run){
1567                         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1568                         else           run= INT_MAX;
1569                         v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1570                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1571
1572                         xc->x=x;
1573                         (xc++)->coeff= v;
1574                     }else{
1575                         int max_run;
1576                         run--;
1577                         v=0;
1578
1579                         if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1580                         else  max_run= FFMIN(run, w-x-1);
1581                         if(parent_xc)
1582                             max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1583                         x+= max_run;
1584                         run-= max_run;
1585                     }
1586                 }
1587             }
1588             (xc++)->x= w+1; //end marker
1589             prev_xc= prev2_xc;
1590             prev2_xc= xc;
1591
1592             if(parent_xc){
1593                 if(y&1){
1594                     while(parent_xc->x != parent->width+1)
1595                         parent_xc++;
1596                     parent_xc++;
1597                     prev_parent_xc= parent_xc;
1598                 }else{
1599                     parent_xc= prev_parent_xc;
1600                 }
1601             }
1602         }
1603
1604         (xc++)->x= w+1; //end marker
1605     }
1606 }
1607
1608 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1609     const int w= b->width;
1610     int y;
1611     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1612     int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1613     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1614     int new_index = 0;
1615
1616     START_TIMER
1617
1618     if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1619         qadd= 0;
1620         qmul= 1<<QEXPSHIFT;
1621     }
1622
1623     /* If we are on the second or later slice, restore our index. */
1624     if (start_y != 0)
1625         new_index = save_state[0];
1626
1627
1628     for(y=start_y; y<h; y++){
1629         int x = 0;
1630         int v;
1631         IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1632         memset(line, 0, b->width*sizeof(IDWTELEM));
1633         v = b->x_coeff[new_index].coeff;
1634         x = b->x_coeff[new_index++].x;
1635         while(x < w)
1636         {
1637             register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1638             register int u= -(v&1);
1639             line[x] = (t^u) - u;
1640
1641             v = b->x_coeff[new_index].coeff;
1642             x = b->x_coeff[new_index++].x;
1643         }
1644     }
1645     if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1646         STOP_TIMER("decode_subband")
1647     }
1648
1649     /* Save our variables for the next slice. */
1650     save_state[0] = new_index;
1651
1652     return;
1653 }
1654
1655 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1656     int plane_index, level, orientation;
1657
1658     for(plane_index=0; plane_index<3; plane_index++){
1659         for(level=0; level<MAX_DECOMPOSITIONS; level++){
1660             for(orientation=level ? 1:0; orientation<4; orientation++){
1661                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1662             }
1663         }
1664     }
1665     memset(s->header_state, MID_STATE, sizeof(s->header_state));
1666     memset(s->block_state, MID_STATE, sizeof(s->block_state));
1667 }
1668
1669 static int alloc_blocks(SnowContext *s){
1670     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1671     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1672
1673     s->b_width = w;
1674     s->b_height= h;
1675
1676     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1677     return 0;
1678 }
1679
1680 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1681     uint8_t *bytestream= d->bytestream;
1682     uint8_t *bytestream_start= d->bytestream_start;
1683     *d= *s;
1684     d->bytestream= bytestream;
1685     d->bytestream_start= bytestream_start;
1686 }
1687
1688 //near copy & paste from dsputil, FIXME
1689 static int pix_sum(uint8_t * pix, int line_size, int w)
1690 {
1691     int s, i, j;
1692
1693     s = 0;
1694     for (i = 0; i < w; i++) {
1695         for (j = 0; j < w; j++) {
1696             s += pix[0];
1697             pix ++;
1698         }
1699         pix += line_size - w;
1700     }
1701     return s;
1702 }
1703
1704 //near copy & paste from dsputil, FIXME
1705 static int pix_norm1(uint8_t * pix, int line_size, int w)
1706 {
1707     int s, i, j;
1708     uint32_t *sq = ff_squareTbl + 256;
1709
1710     s = 0;
1711     for (i = 0; i < w; i++) {
1712         for (j = 0; j < w; j ++) {
1713             s += sq[pix[0]];
1714             pix ++;
1715         }
1716         pix += line_size - w;
1717     }
1718     return s;
1719 }
1720
1721 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1722     const int w= s->b_width << s->block_max_depth;
1723     const int rem_depth= s->block_max_depth - level;
1724     const int index= (x + y*w) << rem_depth;
1725     const int block_w= 1<<rem_depth;
1726     BlockNode block;
1727     int i,j;
1728
1729     block.color[0]= l;
1730     block.color[1]= cb;
1731     block.color[2]= cr;
1732     block.mx= mx;
1733     block.my= my;
1734     block.ref= ref;
1735     block.type= type;
1736     block.level= level;
1737
1738     for(j=0; j<block_w; j++){
1739         for(i=0; i<block_w; i++){
1740             s->block[index + i + j*w]= block;
1741         }
1742     }
1743 }
1744
1745 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1746     const int offset[3]= {
1747           y*c->  stride + x,
1748         ((y*c->uvstride + x)>>1),
1749         ((y*c->uvstride + x)>>1),
1750     };
1751     int i;
1752     for(i=0; i<3; i++){
1753         c->src[0][i]= src [i];
1754         c->ref[0][i]= ref [i] + offset[i];
1755     }
1756     assert(!ref_index);
1757 }
1758
1759 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1760                            const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1761     if(s->ref_frames == 1){
1762         *mx = mid_pred(left->mx, top->mx, tr->mx);
1763         *my = mid_pred(left->my, top->my, tr->my);
1764     }else{
1765         const int *scale = scale_mv_ref[ref];
1766         *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1767                        (top ->mx * scale[top ->ref] + 128) >>8,
1768                        (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1769         *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1770                        (top ->my * scale[top ->ref] + 128) >>8,
1771                        (tr  ->my * scale[tr  ->ref] + 128) >>8);
1772     }
1773 }
1774
1775 //FIXME copy&paste
1776 #define P_LEFT P[1]
1777 #define P_TOP P[2]
1778 #define P_TOPRIGHT P[3]
1779 #define P_MEDIAN P[4]
1780 #define P_MV1 P[9]
1781 #define FLAG_QPEL   1 //must be 1
1782
1783 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1784     uint8_t p_buffer[1024];
1785     uint8_t i_buffer[1024];
1786     uint8_t p_state[sizeof(s->block_state)];
1787     uint8_t i_state[sizeof(s->block_state)];
1788     RangeCoder pc, ic;
1789     uint8_t *pbbak= s->c.bytestream;
1790     uint8_t *pbbak_start= s->c.bytestream_start;
1791     int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1792     const int w= s->b_width  << s->block_max_depth;
1793     const int h= s->b_height << s->block_max_depth;
1794     const int rem_depth= s->block_max_depth - level;
1795     const int index= (x + y*w) << rem_depth;
1796     const int block_w= 1<<(LOG2_MB_SIZE - level);
1797     int trx= (x+1)<<rem_depth;
1798     int try= (y+1)<<rem_depth;
1799     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1800     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1801     const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1802     const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1803     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1804     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1805     int pl = left->color[0];
1806     int pcb= left->color[1];
1807     int pcr= left->color[2];
1808     int pmx, pmy;
1809     int mx=0, my=0;
1810     int l,cr,cb;
1811     const int stride= s->current_picture.linesize[0];
1812     const int uvstride= s->current_picture.linesize[1];
1813     uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1814                                 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1815                                 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1816     int P[10][2];
1817     int16_t last_mv[3][2];
1818     int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1819     const int shift= 1+qpel;
1820     MotionEstContext *c= &s->m.me;
1821     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1822     int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1823     int my_context= av_log2(2*FFABS(left->my - top->my));
1824     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1825     int ref, best_ref, ref_score, ref_mx, ref_my;
1826
1827     assert(sizeof(s->block_state) >= 256);
1828     if(s->keyframe){
1829         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1830         return 0;
1831     }
1832
1833 //    clip predictors / edge ?
1834
1835     P_LEFT[0]= left->mx;
1836     P_LEFT[1]= left->my;
1837     P_TOP [0]= top->mx;
1838     P_TOP [1]= top->my;
1839     P_TOPRIGHT[0]= tr->mx;
1840     P_TOPRIGHT[1]= tr->my;
1841
1842     last_mv[0][0]= s->block[index].mx;
1843     last_mv[0][1]= s->block[index].my;
1844     last_mv[1][0]= right->mx;
1845     last_mv[1][1]= right->my;
1846     last_mv[2][0]= bottom->mx;
1847     last_mv[2][1]= bottom->my;
1848
1849     s->m.mb_stride=2;
1850     s->m.mb_x=
1851     s->m.mb_y= 0;
1852     c->skip= 0;
1853
1854     assert(c->  stride ==   stride);
1855     assert(c->uvstride == uvstride);
1856
1857     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1858     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1859     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1860     c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1861
1862     c->xmin = - x*block_w - 16+2;
1863     c->ymin = - y*block_w - 16+2;
1864     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1865     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1866
1867     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1868     if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1869     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1870     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1871     if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1872     if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1873     if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1874
1875     P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1876     P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1877
1878     if (!y) {
1879         c->pred_x= P_LEFT[0];
1880         c->pred_y= P_LEFT[1];
1881     } else {
1882         c->pred_x = P_MEDIAN[0];
1883         c->pred_y = P_MEDIAN[1];
1884     }
1885
1886     score= INT_MAX;
1887     best_ref= 0;
1888     for(ref=0; ref<s->ref_frames; ref++){
1889         init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1890
1891         ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1892                                          (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1893
1894         assert(ref_mx >= c->xmin);
1895         assert(ref_mx <= c->xmax);
1896         assert(ref_my >= c->ymin);
1897         assert(ref_my <= c->ymax);
1898
1899         ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1900         ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1901         ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1902         if(s->ref_mvs[ref]){
1903             s->ref_mvs[ref][index][0]= ref_mx;
1904             s->ref_mvs[ref][index][1]= ref_my;
1905             s->ref_scores[ref][index]= ref_score;
1906         }
1907         if(score > ref_score){
1908             score= ref_score;
1909             best_ref= ref;
1910             mx= ref_mx;
1911             my= ref_my;
1912         }
1913     }
1914     //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1915
1916   //  subpel search
1917     base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1918     pc= s->c;
1919     pc.bytestream_start=
1920     pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1921     memcpy(p_state, s->block_state, sizeof(s->block_state));
1922
1923     if(level!=s->block_max_depth)
1924         put_rac(&pc, &p_state[4 + s_context], 1);
1925     put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1926     if(s->ref_frames > 1)
1927         put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1928     pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1929     put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1930     put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1931     p_len= pc.bytestream - pc.bytestream_start;
1932     score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1933
1934     block_s= block_w*block_w;
1935     sum = pix_sum(current_data[0], stride, block_w);
1936     l= (sum + block_s/2)/block_s;
1937     iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1938
1939     block_s= block_w*block_w>>2;
1940     sum = pix_sum(current_data[1], uvstride, block_w>>1);
1941     cb= (sum + block_s/2)/block_s;
1942 //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1943     sum = pix_sum(current_data[2], uvstride, block_w>>1);
1944     cr= (sum + block_s/2)/block_s;
1945 //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1946
1947     ic= s->c;
1948     ic.bytestream_start=
1949     ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1950     memcpy(i_state, s->block_state, sizeof(s->block_state));
1951     if(level!=s->block_max_depth)
1952         put_rac(&ic, &i_state[4 + s_context], 1);
1953     put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1954     put_symbol(&ic, &i_state[32],  l-pl , 1);
1955     put_symbol(&ic, &i_state[64], cb-pcb, 1);
1956     put_symbol(&ic, &i_state[96], cr-pcr, 1);
1957     i_len= ic.bytestream - ic.bytestream_start;
1958     iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1959
1960 //    assert(score==256*256*256*64-1);
1961     assert(iscore < 255*255*256 + s->lambda2*10);
1962     assert(iscore >= 0);
1963     assert(l>=0 && l<=255);
1964     assert(pl>=0 && pl<=255);
1965
1966     if(level==0){
1967         int varc= iscore >> 8;
1968         int vard= score >> 8;
1969         if (vard <= 64 || vard < varc)
1970             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1971         else
1972             c->scene_change_score+= s->m.qscale;
1973     }
1974
1975     if(level!=s->block_max_depth){
1976         put_rac(&s->c, &s->block_state[4 + s_context], 0);
1977         score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1978         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1979         score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1980         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1981         score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1982
1983         if(score2 < score && score2 < iscore)
1984             return score2;
1985     }
1986
1987     if(iscore < score){
1988         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1989         memcpy(pbbak, i_buffer, i_len);
1990         s->c= ic;
1991         s->c.bytestream_start= pbbak_start;
1992         s->c.bytestream= pbbak + i_len;
1993         set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1994         memcpy(s->block_state, i_state, sizeof(s->block_state));
1995         return iscore;
1996     }else{
1997         memcpy(pbbak, p_buffer, p_len);
1998         s->c= pc;
1999         s->c.bytestream_start= pbbak_start;
2000         s->c.bytestream= pbbak + p_len;
2001         set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2002         memcpy(s->block_state, p_state, sizeof(s->block_state));
2003         return score;
2004     }
2005 }
2006
2007 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2008     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2009         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2010     }else{
2011         return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2012     }
2013 }
2014
2015 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2016     const int w= s->b_width  << s->block_max_depth;
2017     const int rem_depth= s->block_max_depth - level;
2018     const int index= (x + y*w) << rem_depth;
2019     int trx= (x+1)<<rem_depth;
2020     BlockNode *b= &s->block[index];
2021     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2022     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2023     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2024     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2025     int pl = left->color[0];
2026     int pcb= left->color[1];
2027     int pcr= left->color[2];
2028     int pmx, pmy;
2029     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2030     int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2031     int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2032     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2033
2034     if(s->keyframe){
2035         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2036         return;
2037     }
2038
2039     if(level!=s->block_max_depth){
2040         if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2041             put_rac(&s->c, &s->block_state[4 + s_context], 1);
2042         }else{
2043             put_rac(&s->c, &s->block_state[4 + s_context], 0);
2044             encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2045             encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2046             encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2047             encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2048             return;
2049         }
2050     }
2051     if(b->type & BLOCK_INTRA){
2052         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2053         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2054         put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2055         put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2056         put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2057         set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2058     }else{
2059         pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2060         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2061         if(s->ref_frames > 1)
2062             put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2063         put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2064         put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2065         set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2066     }
2067 }
2068
2069 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2070     const int w= s->b_width << s->block_max_depth;
2071     const int rem_depth= s->block_max_depth - level;
2072     const int index= (x + y*w) << rem_depth;
2073     int trx= (x+1)<<rem_depth;
2074     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2075     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2076     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2077     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2078     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2079
2080     if(s->keyframe){
2081         set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2082         return;
2083     }
2084
2085     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2086         int type, mx, my;
2087         int l = left->color[0];
2088         int cb= left->color[1];
2089         int cr= left->color[2];
2090         int ref = 0;
2091         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2092         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2093         int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2094
2095         type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2096
2097         if(type){
2098             pred_mv(s, &mx, &my, 0, left, top, tr);
2099             l += get_symbol(&s->c, &s->block_state[32], 1);
2100             cb+= get_symbol(&s->c, &s->block_state[64], 1);
2101             cr+= get_symbol(&s->c, &s->block_state[96], 1);
2102         }else{
2103             if(s->ref_frames > 1)
2104                 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2105             pred_mv(s, &mx, &my, ref, left, top, tr);
2106             mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2107             my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2108         }
2109         set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2110     }else{
2111         decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2112         decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2113         decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2114         decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2115     }
2116 }
2117
2118 static void encode_blocks(SnowContext *s, int search){
2119     int x, y;
2120     int w= s->b_width;
2121     int h= s->b_height;
2122
2123     if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2124         iterative_me(s);
2125
2126     for(y=0; y<h; y++){
2127         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2128             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2129             return;
2130         }
2131         for(x=0; x<w; x++){
2132             if(s->avctx->me_method == ME_ITER || !search)
2133                 encode_q_branch2(s, 0, x, y);
2134             else
2135                 encode_q_branch (s, 0, x, y);
2136         }
2137     }
2138 }
2139
2140 static void decode_blocks(SnowContext *s){
2141     int x, y;
2142     int w= s->b_width;
2143     int h= s->b_height;
2144
2145     for(y=0; y<h; y++){
2146         for(x=0; x<w; x++){
2147             decode_q_branch(s, 0, x, y);
2148         }
2149     }
2150 }
2151
2152 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2153     const static uint8_t weight[64]={
2154     8,7,6,5,4,3,2,1,
2155     7,7,0,0,0,0,0,1,
2156     6,0,6,0,0,0,2,0,
2157     5,0,0,5,0,3,0,0,
2158     4,0,0,0,4,0,0,0,
2159     3,0,0,5,0,3,0,0,
2160     2,0,6,0,0,0,2,0,
2161     1,7,0,0,0,0,0,1,
2162     };
2163
2164     const static uint8_t brane[256]={
2165     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2166     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2167     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2168     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2169     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2170     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2171     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2172     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2173     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2174     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2175     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2176     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2177     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2178     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2179     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2180     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2181     };
2182
2183     const static uint8_t needs[16]={
2184     0,1,0,0,
2185     2,4,2,0,
2186     0,1,0,0,
2187     15
2188     };
2189
2190     int x, y, b, r, l;
2191     int16_t tmpIt   [64*(32+HTAPS_MAX)];
2192     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2193     int16_t *tmpI= tmpIt;
2194     uint8_t *tmp2= tmp2t[0];
2195     uint8_t *hpel[11];
2196 START_TIMER
2197     assert(dx<16 && dy<16);
2198     r= brane[dx + 16*dy]&15;
2199     l= brane[dx + 16*dy]>>4;
2200
2201     b= needs[l] | needs[r];
2202     if(p && !p->diag_mc)
2203         b= 15;
2204
2205     if(b&5){
2206         for(y=0; y < b_h+HTAPS_MAX-1; y++){
2207             for(x=0; x < b_w; x++){
2208                 int a_1=src[x + HTAPS_MAX/2-4];
2209                 int a0= src[x + HTAPS_MAX/2-3];
2210                 int a1= src[x + HTAPS_MAX/2-2];
2211                 int a2= src[x + HTAPS_MAX/2-1];
2212                 int a3= src[x + HTAPS_MAX/2+0];
2213                 int a4= src[x + HTAPS_MAX/2+1];
2214                 int a5= src[x + HTAPS_MAX/2+2];
2215                 int a6= src[x + HTAPS_MAX/2+3];
2216                 int am=0;
2217                 if(!p || p->fast_mc){
2218                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2219                     tmpI[x]= am;
2220                     am= (am+16)>>5;
2221                 }else{
2222                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2223                     tmpI[x]= am;
2224                     am= (am+32)>>6;
2225                 }
2226
2227                 if(am&(~255)) am= ~(am>>31);
2228                 tmp2[x]= am;
2229             }
2230             tmpI+= 64;
2231             tmp2+= stride;
2232             src += stride;
2233         }
2234         src -= stride*y;
2235     }
2236     src += HTAPS_MAX/2 - 1;
2237     tmp2= tmp2t[1];
2238
2239     if(b&2){
2240         for(y=0; y < b_h; y++){
2241             for(x=0; x < b_w+1; x++){
2242                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2243                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2244                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2245                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2246                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2247                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2248                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2249                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2250                 int am=0;
2251                 if(!p || p->fast_mc)
2252                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2253                 else
2254                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2255
2256                 if(am&(~255)) am= ~(am>>31);
2257                 tmp2[x]= am;
2258             }
2259             src += stride;
2260             tmp2+= stride;
2261         }
2262         src -= stride*y;
2263     }
2264     src += stride*(HTAPS_MAX/2 - 1);
2265     tmp2= tmp2t[2];
2266     tmpI= tmpIt;
2267     if(b&4){
2268         for(y=0; y < b_h; y++){
2269             for(x=0; x < b_w; x++){
2270                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2271                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2272                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2273                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2274                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2275                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2276                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2277                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2278                 int am=0;
2279                 if(!p || p->fast_mc)
2280                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2281                 else
2282                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2283                 if(am&(~255)) am= ~(am>>31);
2284                 tmp2[x]= am;
2285             }
2286             tmpI+= 64;
2287             tmp2+= stride;
2288         }
2289     }
2290
2291     hpel[ 0]= src;
2292     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2293     hpel[ 2]= src + 1;
2294
2295     hpel[ 4]= tmp2t[1];
2296     hpel[ 5]= tmp2t[2];
2297     hpel[ 6]= tmp2t[1] + 1;
2298
2299     hpel[ 8]= src + stride;
2300     hpel[ 9]= hpel[1] + stride;
2301     hpel[10]= hpel[8] + 1;
2302
2303     if(b==15){
2304         uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2305         uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2306         uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2307         uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2308         dx&=7;
2309         dy&=7;
2310         for(y=0; y < b_h; y++){
2311             for(x=0; x < b_w; x++){
2312                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2313                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2314             }
2315             src1+=stride;
2316             src2+=stride;
2317             src3+=stride;
2318             src4+=stride;
2319             dst +=stride;
2320         }
2321     }else{
2322         uint8_t *src1= hpel[l];
2323         uint8_t *src2= hpel[r];
2324         int a= weight[((dx&7) + (8*(dy&7)))];
2325         int b= 8-a;
2326         for(y=0; y < b_h; y++){
2327             for(x=0; x < b_w; x++){
2328                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2329             }
2330             src1+=stride;
2331             src2+=stride;
2332             dst +=stride;
2333         }
2334     }
2335 STOP_TIMER("mc_block")
2336 }
2337
2338 #define mca(dx,dy,b_w)\
2339 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2340     uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2341     assert(h==b_w);\
2342     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2343 }
2344
2345 mca( 0, 0,16)
2346 mca( 8, 0,16)
2347 mca( 0, 8,16)
2348 mca( 8, 8,16)
2349 mca( 0, 0,8)
2350 mca( 8, 0,8)
2351 mca( 0, 8,8)
2352 mca( 8, 8,8)
2353
2354 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2355     if(block->type & BLOCK_INTRA){
2356         int x, y;
2357         const int color = block->color[plane_index];
2358         const int color4= color*0x01010101;
2359         if(b_w==32){
2360             for(y=0; y < b_h; y++){
2361                 *(uint32_t*)&dst[0 + y*stride]= color4;
2362                 *(uint32_t*)&dst[4 + y*stride]= color4;
2363                 *(uint32_t*)&dst[8 + y*stride]= color4;
2364                 *(uint32_t*)&dst[12+ y*stride]= color4;
2365                 *(uint32_t*)&dst[16+ y*stride]= color4;
2366                 *(uint32_t*)&dst[20+ y*stride]= color4;
2367                 *(uint32_t*)&dst[24+ y*stride]= color4;
2368                 *(uint32_t*)&dst[28+ y*stride]= color4;
2369             }
2370         }else if(b_w==16){
2371             for(y=0; y < b_h; y++){
2372                 *(uint32_t*)&dst[0 + y*stride]= color4;
2373                 *(uint32_t*)&dst[4 + y*stride]= color4;
2374                 *(uint32_t*)&dst[8 + y*stride]= color4;
2375                 *(uint32_t*)&dst[12+ y*stride]= color4;
2376             }
2377         }else if(b_w==8){
2378             for(y=0; y < b_h; y++){
2379                 *(uint32_t*)&dst[0 + y*stride]= color4;
2380                 *(uint32_t*)&dst[4 + y*stride]= color4;
2381             }
2382         }else if(b_w==4){
2383             for(y=0; y < b_h; y++){
2384                 *(uint32_t*)&dst[0 + y*stride]= color4;
2385             }
2386         }else{
2387             for(y=0; y < b_h; y++){
2388                 for(x=0; x < b_w; x++){
2389                     dst[x + y*stride]= color;
2390                 }
2391             }
2392         }
2393     }else{
2394         uint8_t *src= s->last_picture[block->ref].data[plane_index];
2395         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2396         int mx= block->mx*scale;
2397         int my= block->my*scale;
2398         const int dx= mx&15;
2399         const int dy= my&15;
2400         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2401         sx += (mx>>4) - (HTAPS_MAX/2-1);
2402         sy += (my>>4) - (HTAPS_MAX/2-1);
2403         src += sx + sy*stride;
2404         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2405            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2406             ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2407             src= tmp + MB_SIZE;
2408         }
2409 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2410 //        assert(!(b_w&(b_w-1)));
2411         assert(b_w>1 && b_h>1);
2412         assert(tab_index>=0 && tab_index<4 || b_w==32);
2413         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2414             mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2415         else if(b_w==32){
2416             int y;
2417             for(y=0; y<b_h; y+=16){
2418                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2419                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2420             }
2421         }else if(b_w==b_h)
2422             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2423         else if(b_w==2*b_h){
2424             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2425             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2426         }else{
2427             assert(2*b_w==b_h);
2428             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2429             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2430         }
2431     }
2432 }
2433
2434 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2435                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2436     int y, x;
2437     IDWTELEM * dst;
2438     for(y=0; y<b_h; y++){
2439         //FIXME ugly misuse of obmc_stride
2440         const uint8_t *obmc1= obmc + y*obmc_stride;
2441         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2442         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2443         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2444         dst = slice_buffer_get_line(sb, src_y + y);
2445         for(x=0; x<b_w; x++){
2446             int v=   obmc1[x] * block[3][x + y*src_stride]
2447                     +obmc2[x] * block[2][x + y*src_stride]
2448                     +obmc3[x] * block[1][x + y*src_stride]
2449                     +obmc4[x] * block[0][x + y*src_stride];
2450
2451             v <<= 8 - LOG2_OBMC_MAX;
2452             if(FRAC_BITS != 8){
2453                 v >>= 8 - FRAC_BITS;
2454             }
2455             if(add){
2456                 v += dst[x + src_x];
2457                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2458                 if(v&(~255)) v= ~(v>>31);
2459                 dst8[x + y*src_stride] = v;
2460             }else{
2461                 dst[x + src_x] -= v;
2462             }
2463         }
2464     }
2465 }
2466
2467 //FIXME name clenup (b_w, block_w, b_width stuff)
2468 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2469     const int b_width = s->b_width  << s->block_max_depth;
2470     const int b_height= s->b_height << s->block_max_depth;
2471     const int b_stride= b_width;
2472     BlockNode *lt= &s->block[b_x + b_y*b_stride];
2473     BlockNode *rt= lt+1;
2474     BlockNode *lb= lt+b_stride;
2475     BlockNode *rb= lb+1;
2476     uint8_t *block[4];
2477     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2478     uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2479     uint8_t *ptmp;
2480     int x,y;
2481
2482     if(b_x<0){
2483         lt= rt;
2484         lb= rb;
2485     }else if(b_x + 1 >= b_width){
2486         rt= lt;
2487         rb= lb;
2488     }
2489     if(b_y<0){
2490         lt= lb;
2491         rt= rb;
2492     }else if(b_y + 1 >= b_height){
2493         lb= lt;
2494         rb= rt;
2495     }
2496
2497     if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2498         obmc -= src_x;
2499         b_w += src_x;
2500         if(!sliced && !offset_dst)
2501             dst -= src_x;
2502         src_x=0;
2503     }else if(src_x + b_w > w){
2504         b_w = w - src_x;
2505     }
2506     if(src_y<0){
2507         obmc -= src_y*obmc_stride;
2508         b_h += src_y;
2509         if(!sliced && !offset_dst)
2510             dst -= src_y*dst_stride;
2511         src_y=0;
2512     }else if(src_y + b_h> h){
2513         b_h = h - src_y;
2514     }
2515
2516     if(b_w<=0 || b_h<=0) return;
2517
2518 assert(src_stride > 2*MB_SIZE + 5);
2519     if(!sliced && offset_dst)
2520         dst += src_x + src_y*dst_stride;
2521     dst8+= src_x + src_y*src_stride;
2522 //    src += src_x + src_y*src_stride;
2523
2524     ptmp= tmp + 3*tmp_step;
2525     block[0]= ptmp;
2526     ptmp+=tmp_step;
2527     pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2528
2529     if(same_block(lt, rt)){
2530         block[1]= block[0];
2531     }else{
2532         block[1]= ptmp;
2533         ptmp+=tmp_step;
2534         pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2535     }
2536
2537     if(same_block(lt, lb)){
2538         block[2]= block[0];
2539     }else if(same_block(rt, lb)){
2540         block[2]= block[1];
2541     }else{
2542         block[2]= ptmp;
2543         ptmp+=tmp_step;
2544         pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2545     }
2546
2547     if(same_block(lt, rb) ){
2548         block[3]= block[0];
2549     }else if(same_block(rt, rb)){
2550         block[3]= block[1];
2551     }else if(same_block(lb, rb)){
2552         block[3]= block[2];
2553     }else{
2554         block[3]= ptmp;
2555         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2556     }
2557 #if 0
2558     for(y=0; y<b_h; y++){
2559         for(x=0; x<b_w; x++){
2560             int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2561             if(add) dst[x + y*dst_stride] += v;
2562             else    dst[x + y*dst_stride] -= v;
2563         }
2564     }
2565     for(y=0; y<b_h; y++){
2566         uint8_t *obmc2= obmc + (obmc_stride>>1);
2567         for(x=0; x<b_w; x++){
2568             int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2569             if(add) dst[x + y*dst_stride] += v;
2570             else    dst[x + y*dst_stride] -= v;
2571         }
2572     }
2573     for(y=0; y<b_h; y++){
2574         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2575         for(x=0; x<b_w; x++){
2576             int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2577             if(add) dst[x + y*dst_stride] += v;
2578             else    dst[x + y*dst_stride] -= v;
2579         }
2580     }
2581     for(y=0; y<b_h; y++){
2582         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2583         uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2584         for(x=0; x<b_w; x++){
2585             int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2586             if(add) dst[x + y*dst_stride] += v;
2587             else    dst[x + y*dst_stride] -= v;
2588         }
2589     }
2590 #else
2591     if(sliced){
2592         START_TIMER
2593
2594         s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2595         STOP_TIMER("inner_add_yblock")
2596     }else
2597     for(y=0; y<b_h; y++){
2598         //FIXME ugly misuse of obmc_stride
2599         const uint8_t *obmc1= obmc + y*obmc_stride;
2600         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2601         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2602         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2603         for(x=0; x<b_w; x++){
2604             int v=   obmc1[x] * block[3][x + y*src_stride]
2605                     +obmc2[x] * block[2][x + y*src_stride]
2606                     +obmc3[x] * block[1][x + y*src_stride]
2607                     +obmc4[x] * block[0][x + y*src_stride];
2608
2609             v <<= 8 - LOG2_OBMC_MAX;
2610             if(FRAC_BITS != 8){
2611                 v >>= 8 - FRAC_BITS;
2612             }
2613             if(add){
2614                 v += dst[x + y*dst_stride];
2615                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2616                 if(v&(~255)) v= ~(v>>31);
2617                 dst8[x + y*src_stride] = v;
2618             }else{
2619                 dst[x + y*dst_stride] -= v;
2620             }
2621         }
2622     }
2623 #endif
2624 }
2625
2626 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2627     Plane *p= &s->plane[plane_index];
2628     const int mb_w= s->b_width  << s->block_max_depth;
2629     const int mb_h= s->b_height << s->block_max_depth;
2630     int x, y, mb_x;
2631     int block_size = MB_SIZE >> s->block_max_depth;
2632     int block_w    = plane_index ? block_size/2 : block_size;
2633     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2634     int obmc_stride= plane_index ? block_size : 2*block_size;
2635     int ref_stride= s->current_picture.linesize[plane_index];
2636     uint8_t *dst8= s->current_picture.data[plane_index];
2637     int w= p->width;
2638     int h= p->height;
2639     START_TIMER
2640
2641     if(s->keyframe || (s->avctx->debug&512)){
2642         if(mb_y==mb_h)
2643             return;
2644
2645         if(add){
2646             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2647             {
2648 //                DWTELEM * line = slice_buffer_get_line(sb, y);
2649                 IDWTELEM * line = sb->line[y];
2650                 for(x=0; x<w; x++)
2651                 {
2652 //                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2653                     int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654                     v >>= FRAC_BITS;
2655                     if(v&(~255)) v= ~(v>>31);
2656                     dst8[x + y*ref_stride]= v;
2657                 }
2658             }
2659         }else{
2660             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2661             {
2662 //                DWTELEM * line = slice_buffer_get_line(sb, y);
2663                 IDWTELEM * line = sb->line[y];
2664                 for(x=0; x<w; x++)
2665                 {
2666                     line[x] -= 128 << FRAC_BITS;
2667 //                    buf[x + y*w]-= 128<<FRAC_BITS;
2668                 }
2669             }
2670         }
2671
2672         return;
2673     }
2674
2675         for(mb_x=0; mb_x<=mb_w; mb_x++){
2676             START_TIMER
2677
2678             add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2679                        block_w*mb_x - block_w/2,
2680                        block_w*mb_y - block_w/2,
2681                        block_w, block_w,
2682                        w, h,
2683                        w, ref_stride, obmc_stride,
2684                        mb_x - 1, mb_y - 1,
2685                        add, 0, plane_index);
2686
2687             STOP_TIMER("add_yblock")
2688         }
2689
2690     STOP_TIMER("predict_slice")
2691 }
2692
2693 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2694     Plane *p= &s->plane[plane_index];
2695     const int mb_w= s->b_width  << s->block_max_depth;
2696     const int mb_h= s->b_height << s->block_max_depth;
2697     int x, y, mb_x;
2698     int block_size = MB_SIZE >> s->block_max_depth;
2699     int block_w    = plane_index ? block_size/2 : block_size;
2700     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2701     const int obmc_stride= plane_index ? block_size : 2*block_size;
2702     int ref_stride= s->current_picture.linesize[plane_index];
2703     uint8_t *dst8= s->current_picture.data[plane_index];
2704     int w= p->width;
2705     int h= p->height;
2706     START_TIMER
2707
2708     if(s->keyframe || (s->avctx->debug&512)){
2709         if(mb_y==mb_h)
2710             return;
2711
2712         if(add){
2713             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2714                 for(x=0; x<w; x++){
2715                     int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2716                     v >>= FRAC_BITS;
2717                     if(v&(~255)) v= ~(v>>31);
2718                     dst8[x + y*ref_stride]= v;
2719                 }
2720             }
2721         }else{
2722             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2723                 for(x=0; x<w; x++){
2724                     buf[x + y*w]-= 128<<FRAC_BITS;
2725                 }
2726             }
2727         }
2728
2729         return;
2730     }
2731
2732         for(mb_x=0; mb_x<=mb_w; mb_x++){
2733             START_TIMER
2734
2735             add_yblock(s, 0, NULL, buf, dst8, obmc,
2736                        block_w*mb_x - block_w/2,
2737                        block_w*mb_y - block_w/2,
2738                        block_w, block_w,
2739                        w, h,
2740                        w, ref_stride, obmc_stride,
2741                        mb_x - 1, mb_y - 1,
2742                        add, 1, plane_index);
2743
2744             STOP_TIMER("add_yblock")
2745         }
2746
2747     STOP_TIMER("predict_slice")
2748 }
2749
2750 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2751     const int mb_h= s->b_height << s->block_max_depth;
2752     int mb_y;
2753     for(mb_y=0; mb_y<=mb_h; mb_y++)
2754         predict_slice(s, buf, plane_index, add, mb_y);
2755 }
2756
2757 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2758     int i, x2, y2;
2759     Plane *p= &s->plane[plane_index];
2760     const int block_size = MB_SIZE >> s->block_max_depth;
2761     const int block_w    = plane_index ? block_size/2 : block_size;
2762     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2763     const int obmc_stride= plane_index ? block_size : 2*block_size;
2764     const int ref_stride= s->current_picture.linesize[plane_index];
2765     uint8_t *src= s-> input_picture.data[plane_index];
2766     IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2767     const int b_stride = s->b_width << s->block_max_depth;
2768     const int w= p->width;
2769     const int h= p->height;
2770     int index= mb_x + mb_y*b_stride;
2771     BlockNode *b= &s->block[index];
2772     BlockNode backup= *b;
2773     int ab=0;
2774     int aa=0;
2775
2776     b->type|= BLOCK_INTRA;
2777     b->color[plane_index]= 0;
2778     memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2779
2780     for(i=0; i<4; i++){
2781         int mb_x2= mb_x + (i &1) - 1;
2782         int mb_y2= mb_y + (i>>1) - 1;
2783         int x= block_w*mb_x2 + block_w/2;
2784         int y= block_w*mb_y2 + block_w/2;
2785
2786         add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2787                     x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2788
2789         for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2790             for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2791                 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2792                 int obmc_v= obmc[index];
2793                 int d;
2794                 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2795                 if(x<0) obmc_v += obmc[index + block_w];
2796                 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2797                 if(x+block_w>w) obmc_v += obmc[index - block_w];
2798                 //FIXME precalc this or simplify it somehow else
2799
2800                 d = -dst[index] + (1<<(FRAC_BITS-1));
2801                 dst[index] = d;
2802                 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2803                 aa += obmc_v * obmc_v; //FIXME precalclate this
2804             }
2805         }
2806     }
2807     *b= backup;
2808
2809     return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2810 }
2811
2812 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2813     const int b_stride = s->b_width << s->block_max_depth;
2814     const int b_height = s->b_height<< s->block_max_depth;
2815     int index= x + y*b_stride;
2816     const BlockNode *b     = &s->block[index];
2817     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2818     const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2819     const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2820     const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2821     int dmx, dmy;
2822 //  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2823 //  int my_context= av_log2(2*FFABS(left->my - top->my));
2824
2825     if(x<0 || x>=b_stride || y>=b_height)
2826         return 0;
2827 /*
2828 1            0      0
2829 01X          1-2    1
2830 001XX        3-6    2-3
2831 0001XXX      7-14   4-7
2832 00001XXXX   15-30   8-15
2833 */
2834 //FIXME try accurate rate
2835 //FIXME intra and inter predictors if surrounding blocks arent the same type
2836     if(b->type & BLOCK_INTRA){
2837         return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2838                    + av_log2(2*FFABS(left->color[1] - b->color[1]))
2839                    + av_log2(2*FFABS(left->color[2] - b->color[2])));
2840     }else{
2841         pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2842         dmx-= b->mx;
2843         dmy-= b->my;
2844         return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2845                     + av_log2(2*FFABS(dmy))
2846                     + av_log2(2*b->ref));
2847     }
2848 }
2849
2850 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2851     Plane *p= &s->plane[plane_index];
2852     const int block_size = MB_SIZE >> s->block_max_depth;
2853     const int block_w    = plane_index ? block_size/2 : block_size;
2854     const int obmc_stride= plane_index ? block_size : 2*block_size;
2855     const int ref_stride= s->current_picture.linesize[plane_index];
2856     uint8_t *dst= s->current_picture.data[plane_index];
2857     uint8_t *src= s->  input_picture.data[plane_index];
2858     IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2859     uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2860     uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2861     const int b_stride = s->b_width << s->block_max_depth;
2862     const int b_height = s->b_height<< s->block_max_depth;
2863     const int w= p->width;
2864     const int h= p->height;
2865     int distortion;
2866     int rate= 0;
2867     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2868     int sx= block_w*mb_x - block_w/2;
2869     int sy= block_w*mb_y - block_w/2;
2870     int x0= FFMAX(0,-sx);
2871     int y0= FFMAX(0,-sy);
2872     int x1= FFMIN(block_w*2, w-sx);
2873     int y1= FFMIN(block_w*2, h-sy);
2874     int i,x,y;
2875
2876     pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2877
2878     for(y=y0; y<y1; y++){
2879         const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2880         const IDWTELEM *pred1 = pred + y*obmc_stride;
2881         uint8_t *cur1 = cur + y*ref_stride;
2882         uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2883         for(x=x0; x<x1; x++){
2884 #if FRAC_BITS >= LOG2_OBMC_MAX
2885             int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2886 #else
2887             int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2888 #endif
2889             v = (v + pred1[x]) >> FRAC_BITS;
2890             if(v&(~255)) v= ~(v>>31);
2891             dst1[x] = v;
2892         }
2893     }
2894
2895     /* copy the regions where obmc[] = (uint8_t)256 */
2896     if(LOG2_OBMC_MAX == 8
2897         && (mb_x == 0 || mb_x == b_stride-1)
2898         && (mb_y == 0 || mb_y == b_height-1)){
2899         if(mb_x == 0)
2900             x1 = block_w;
2901         else
2902             x0 = block_w;
2903         if(mb_y == 0)
2904             y1 = block_w;
2905         else
2906             y0 = block_w;
2907         for(y=y0; y<y1; y++)
2908             memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2909     }
2910
2911     if(block_w==16){
2912         /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2913         /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2914         /* FIXME cmps overlap but don't cover the wavelet's whole support,
2915          * so improving the score of one block is not strictly guaranteed to
2916          * improve the score of the whole frame, so iterative motion est
2917          * doesn't always converge. */
2918         if(s->avctx->me_cmp == FF_CMP_W97)
2919             distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2920         else if(s->avctx->me_cmp == FF_CMP_W53)
2921             distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2922         else{
2923             distortion = 0;
2924             for(i=0; i<4; i++){
2925                 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2926                 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2927             }
2928         }
2929     }else{
2930         assert(block_w==8);
2931         distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2932     }
2933
2934     if(plane_index==0){
2935         for(i=0; i<4; i++){
2936 /* ..RRr
2937  * .RXx.
2938  * rxx..
2939  */
2940             rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2941         }
2942         if(mb_x == b_stride-2)
2943             rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2944     }
2945     return distortion + rate*penalty_factor;
2946 }
2947
2948 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2949     int i, y2;
2950     Plane *p= &s->plane[plane_index];
2951     const int block_size = MB_SIZE >> s->block_max_depth;
2952     const int block_w    = plane_index ? block_size/2 : block_size;
2953     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2954     const int obmc_stride= plane_index ? block_size : 2*block_size;
2955     const int ref_stride= s->current_picture.linesize[plane_index];
2956     uint8_t *dst= s->current_picture.data[plane_index];
2957     uint8_t *src= s-> input_picture.data[plane_index];
2958     static const IDWTELEM zero_dst[4096]; //FIXME
2959     const int b_stride = s->b_width << s->block_max_depth;
2960     const int w= p->width;
2961     const int h= p->height;
2962     int distortion= 0;
2963     int rate= 0;
2964     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2965
2966     for(i=0; i<9; i++){
2967         int mb_x2= mb_x + (i%3) - 1;
2968         int mb_y2= mb_y + (i/3) - 1;
2969         int x= block_w*mb_x2 + block_w/2;
2970         int y= block_w*mb_y2 + block_w/2;
2971
2972         add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2973                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2974
2975         //FIXME find a cleaner/simpler way to skip the outside stuff
2976         for(y2= y; y2<0; y2++)
2977             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2978         for(y2= h; y2<y+block_w; y2++)
2979             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2980         if(x<0){
2981             for(y2= y; y2<y+block_w; y2++)
2982                 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2983         }
2984         if(x+block_w > w){
2985             for(y2= y; y2<y+block_w; y2++)
2986                 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2987         }
2988
2989         assert(block_w== 8 || block_w==16);
2990         distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2991     }
2992
2993     if(plane_index==0){
2994         BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2995         int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2996
2997 /* ..RRRr
2998  * .RXXx.
2999  * .RXXx.
3000  * rxxx.
3001  */
3002         if(merged)
3003             rate = get_block_bits(s, mb_x, mb_y, 2);
3004         for(i=merged?4:0; i<9; i++){
3005             static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3006             rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3007         }
3008     }
3009     return distortion + rate*penalty_factor;
3010 }
3011
3012 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3013     const int b_stride= s->b_width << s->block_max_depth;
3014     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3015     BlockNode backup= *block;
3016     int rd, index, value;
3017
3018     assert(mb_x>=0 && mb_y>=0);
3019     assert(mb_x<b_stride);
3020
3021     if(intra){
3022         block->color[0] = p[0];
3023         block->color[1] = p[1];
3024         block->color[2] = p[2];
3025         block->type |= BLOCK_INTRA;
3026     }else{
3027         index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3028         value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3029         if(s->me_cache[index] == value)
3030             return 0;
3031         s->me_cache[index]= value;
3032
3033         block->mx= p[0];
3034         block->my= p[1];
3035         block->type &= ~BLOCK_INTRA;
3036     }
3037
3038     rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3039
3040 //FIXME chroma
3041     if(rd < *best_rd){
3042         *best_rd= rd;
3043         return 1;
3044     }else{
3045         *block= backup;
3046         return 0;
3047     }
3048 }
3049
3050 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3051 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3052     int p[2] = {p0, p1};
3053     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3054 }
3055
3056 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3057     const int b_stride= s->b_width << s->block_max_depth;
3058     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3059     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3060     int rd, index, value;
3061
3062     assert(mb_x>=0 && mb_y>=0);
3063     assert(mb_x<b_stride);
3064     assert(((mb_x|mb_y)&1) == 0);
3065
3066     index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3067     value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3068     if(s->me_cache[index] == value)
3069         return 0;
3070     s->me_cache[index]= value;
3071
3072     block->mx= p0;
3073     block->my= p1;
3074     block->ref= ref;
3075     block->type &= ~BLOCK_INTRA;
3076     block[1]= block[b_stride]= block[b_stride+1]= *block;
3077
3078     rd= get_4block_rd(s, mb_x, mb_y, 0);
3079
3080 //FIXME chroma
3081     if(rd < *best_rd){
3082         *best_rd= rd;
3083         return 1;
3084     }else{
3085         block[0]= backup[0];
3086         block[1]= backup[1];
3087         block[b_stride]= backup[2];
3088         block[b_stride+1]= backup[3];
3089         return 0;
3090     }
3091 }
3092
3093 static void iterative_me(SnowContext *s){
3094     int pass, mb_x, mb_y;
3095     const int b_width = s->b_width  << s->block_max_depth;
3096     const int b_height= s->b_height << s->block_max_depth;
3097     const int b_stride= b_width;
3098     int color[3];
3099
3100     {
3101         RangeCoder r = s->c;
3102         uint8_t state[sizeof(s->block_state)];
3103         memcpy(state, s->block_state, sizeof(s->block_state));
3104         for(mb_y= 0; mb_y<s->b_height; mb_y++)
3105             for(mb_x= 0; mb_x<s->b_width; mb_x++)
3106                 encode_q_branch(s, 0, mb_x, mb_y);
3107         s->c = r;
3108         memcpy(s->block_state, state, sizeof(s->block_state));
3109     }
3110
3111     for(pass=0; pass<25; pass++){
3112         int change= 0;
3113
3114         for(mb_y= 0; mb_y<b_height; mb_y++){
3115             for(mb_x= 0; mb_x<b_width; mb_x++){
3116                 int dia_change, i, j, ref;
3117                 int best_rd= INT_MAX, ref_rd;
3118                 BlockNode backup, ref_b;
3119                 const int index= mb_x + mb_y * b_stride;
3120                 BlockNode *block= &s->block[index];
3121                 BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3122                 BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3123                 BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3124                 BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3125                 BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3126                 BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3127                 BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3128                 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3129                 const int b_w= (MB_SIZE >> s->block_max_depth);
3130                 uint8_t obmc_edged[b_w*2][b_w*2];
3131
3132                 if(pass && (block->type & BLOCK_OPT))
3133                     continue;
3134                 block->type |= BLOCK_OPT;
3135
3136                 backup= *block;
3137
3138                 if(!s->me_cache_generation)
3139                     memset(s->me_cache, 0, sizeof(s->me_cache));
3140                 s->me_cache_generation += 1<<22;
3141
3142                 //FIXME precalc
3143                 {
3144                     int x, y;
3145                     memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3146                     if(mb_x==0)
3147                         for(y=0; y<b_w*2; y++)
3148                             memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3149                     if(mb_x==b_stride-1)
3150                         for(y=0; y<b_w*2; y++)
3151                             memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3152                     if(mb_y==0){
3153                         for(x=0; x<b_w*2; x++)
3154                             obmc_edged[0][x] += obmc_edged[b_w-1][x];
3155                         for(y=1; y<b_w; y++)
3156                             memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3157                     }
3158                     if(mb_y==b_height-1){
3159                         for(x=0; x<b_w*2; x++)
3160                             obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3161                         for(y=b_w; y<b_w*2-1; y++)
3162                             memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3163                     }
3164                 }
3165
3166                 //skip stuff outside the picture
3167                 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3168                 {
3169                     uint8_t *src= s->  input_picture.data[0];
3170                     uint8_t *dst= s->current_picture.data[0];
3171                     const int stride= s->current_picture.linesize[0];
3172                     const int block_w= MB_SIZE >> s->block_max_depth;
3173                     const int sx= block_w*mb_x - block_w/2;
3174                     const int sy= block_w*mb_y - block_w/2;
3175                     const int w= s->plane[0].width;
3176                     const int h= s->plane[0].height;
3177                     int y;
3178
3179                     for(y=sy; y<0; y++)
3180                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3181                     for(y=h; y<sy+block_w*2; y++)
3182                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3183                     if(sx<0){
3184                         for(y=sy; y<sy+block_w*2; y++)
3185                             memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3186                     }
3187                     if(sx+block_w*2 > w){
3188                         for(y=sy; y<sy+block_w*2; y++)
3189                             memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3190                     }
3191                 }
3192
3193                 // intra(black) = neighbors' contribution to the current block
3194                 for(i=0; i<3; i++)
3195                     color[i]= get_dc(s, mb_x, mb_y, i);
3196
3197                 // get previous score (cannot be cached due to OBMC)
3198                 if(pass > 0 && (block->type&BLOCK_INTRA)){
3199                     int color0[3]= {block->color[0], block->color[1], block->color[2]};
3200                     check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3201                 }else
3202                     check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3203
3204                 ref_b= *block;
3205                 ref_rd= best_rd;
3206                 for(ref=0; ref < s->ref_frames; ref++){
3207                     int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3208                     if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3209                         continue;
3210                     block->ref= ref;
3211                     best_rd= INT_MAX;
3212
3213                     check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3214                     check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3215                     if(tb)
3216                         check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3217                     if(lb)
3218                         check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3219                     if(rb)
3220                         check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3221                     if(bb)
3222                         check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3223
3224                     /* fullpel ME */
3225                     //FIXME avoid subpel interpol / round to nearest integer
3226                     do{
3227                         dia_change=0;
3228                         for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3229                             for(j=0; j<i; j++){
3230                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3231                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3232                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3233                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3234                             }
3235                         }
3236                     }while(dia_change);
3237                     /* subpel ME */
3238                     do{
3239                         static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3240                         dia_change=0;
3241                         for(i=0; i<8; i++)
3242                             dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3243                     }while(dia_change);
3244                     //FIXME or try the standard 2 pass qpel or similar
3245
3246                     mvr[0][0]= block->mx;
3247                     mvr[0][1]= block->my;
3248                     if(ref_rd > best_rd){
3249                         ref_rd= best_rd;
3250                         ref_b= *block;
3251                     }
3252                 }
3253                 best_rd= ref_rd;
3254                 *block= ref_b;
3255 #if 1
3256                 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3257                 //FIXME RD style color selection
3258 #endif
3259                 if(!same_block(block, &backup)){
3260                     if(tb ) tb ->type &= ~BLOCK_OPT;
3261                     if(lb ) lb ->type &= ~BLOCK_OPT;
3262                     if(rb ) rb ->type &= ~BLOCK_OPT;
3263                     if(bb ) bb ->type &= ~BLOCK_OPT;
3264                     if(tlb) tlb->type &= ~BLOCK_OPT;
3265                     if(trb) trb->type &= ~BLOCK_OPT;
3266                     if(blb) blb->type &= ~BLOCK_OPT;
3267                     if(brb) brb->type &= ~BLOCK_OPT;
3268                     change ++;
3269                 }
3270             }
3271         }
3272         av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3273         if(!change)
3274             break;
3275     }
3276
3277     if(s->block_max_depth == 1){
3278         int change= 0;
3279         for(mb_y= 0; mb_y<b_height; mb_y+=2){
3280             for(mb_x= 0; mb_x<b_width; mb_x+=2){
3281                 int i;
3282                 int best_rd, init_rd;
3283                 const int index= mb_x + mb_y * b_stride;
3284                 BlockNode *b[4];
3285
3286                 b[0]= &s->block[index];
3287                 b[1]= b[0]+1;
3288                 b[2]= b[0]+b_stride;
3289                 b[3]= b[2]+1;
3290                 if(same_block(b[0], b[1]) &&
3291                    same_block(b[0], b[2]) &&
3292                    same_block(b[0], b[3]))
3293                     continue;
3294
3295                 if(!s->me_cache_generation)
3296                     memset(s->me_cache, 0, sizeof(s->me_cache));
3297                 s->me_cache_generation += 1<<22;
3298
3299                 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3300
3301                 //FIXME more multiref search?
3302                 check_4block_inter(s, mb_x, mb_y,
3303                                    (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3304                                    (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3305
3306                 for(i=0; i<4; i++)
3307                     if(!(b[i]->type&BLOCK_INTRA))
3308                         check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3309
3310                 if(init_rd != best_rd)
3311                     change++;
3312             }
3313         }
3314         av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3315     }
3316 }
3317
3318 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3319     const int level= b->level;
3320     const int w= b->width;
3321     const int h= b->height;
3322     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3323     const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3324     int x,y, thres1, thres2;
3325 //    START_TIMER
3326
3327     if(s->qlog == LOSSLESS_QLOG){
3328         for(y=0; y<h; y++)
3329             for(x=0; x<w; x++)
3330                 dst[x + y*stride]= src[x + y*stride];
3331         return;
3332     }
3333
3334     bias= bias ? 0 : (3*qmul)>>3;
3335     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3336     thres2= 2*thres1;
3337
3338     if(!bias){
3339         for(y=0; y<h; y++){
3340             for(x=0; x<w; x++){
3341                 int i= src[x + y*stride];
3342
3343                 if((unsigned)(i+thres1) > thres2){
3344                     if(i>=0){
3345                         i<<= QEXPSHIFT;
3346                         i/= qmul; //FIXME optimize
3347                         dst[x + y*stride]=  i;
3348                     }else{
3349                         i= -i;
3350                         i<<= QEXPSHIFT;
3351                         i/= qmul; //FIXME optimize
3352                         dst[x + y*stride]= -i;
3353                     }
3354                 }else
3355                     dst[x + y*stride]= 0;
3356             }
3357         }
3358     }else{
3359         for(y=0; y<h; y++){
3360             for(x=0; x<w; x++){
3361                 int i= src[x + y*stride];
3362
3363                 if((unsigned)(i+thres1) > thres2){
3364                     if(i>=0){
3365                         i<<= QEXPSHIFT;
3366                         i= (i + bias) / qmul; //FIXME optimize
3367                         dst[x + y*stride]=  i;
3368                     }else{
3369                         i= -i;
3370                         i<<= QEXPSHIFT;
3371                         i= (i + bias) / qmul; //FIXME optimize
3372                         dst[x + y*stride]= -i;
3373                     }
3374                 }else
3375                     dst[x + y*stride]= 0;
3376             }
3377         }
3378     }
3379     if(level+1 == s->spatial_decomposition_count){
3380 //        STOP_TIMER("quantize")
3381     }
3382 }
3383
3384 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3385     const int w= b->width;
3386     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3387     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3388     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3389     int x,y;
3390     START_TIMER
3391
3392     if(s->qlog == LOSSLESS_QLOG) return;
3393
3394     for(y=start_y; y<end_y; y++){
3395 //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3396         IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3397         for(x=0; x<w; x++){
3398             int i= line[x];
3399             if(i<0){
3400                 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3401             }else if(i>0){
3402                 line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3403             }
3404         }
3405     }
3406     if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3407         STOP_TIMER("dquant")
3408     }
3409 }
3410
3411 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3412     const int w= b->width;
3413     const int h= b->height;
3414     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3415     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3416     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3417     int x,y;
3418     START_TIMER
3419
3420     if(s->qlog == LOSSLESS_QLOG) return;
3421
3422     for(y=0; y<h; y++){
3423         for(x=0; x<w; x++){
3424             int i= src[x + y*stride];
3425             if(i<0){
3426                 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3427             }else if(i>0){
3428                 src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3429             }
3430         }
3431     }
3432     if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3433         STOP_TIMER("dquant")
3434     }
3435 }
3436
3437 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3438     const int w= b->width;
3439     const int h= b->height;
3440     int x,y;
3441
3442     for(y=h-1; y>=0; y--){
3443         for(x=w-1; x>=0; x--){
3444             int i= x + y*stride;
3445
3446             if(x){
3447                 if(use_median){
3448                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3449                     else  src[i] -= src[i - 1];
3450                 }else{
3451                     if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3452                     else  src[i] -= src[i - 1];
3453                 }
3454             }else{
3455                 if(y) src[i] -= src[i - stride];
3456             }
3457         }
3458     }
3459 }
3460
3461 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3462     const int w= b->width;
3463     int x,y;
3464
3465 //    START_TIMER
3466
3467     IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3468     IDWTELEM * prev;
3469
3470     if (start_y != 0)
3471         line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3472
3473     for(y=start_y; y<end_y; y++){
3474         prev = line;
3475 //        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3476         line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3477         for(x=0; x<w; x++){
3478             if(x){
3479                 if(use_median){
3480                     if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3481                     else  line[x] += line[x - 1];
3482                 }else{
3483                     if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3484                     else  line[x] += line[x - 1];
3485                 }
3486             }else{
3487                 if(y) line[x] += prev[x];
3488             }
3489         }
3490     }
3491
3492 //    STOP_TIMER("correlate")
3493 }
3494
3495 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3496     const int w= b->width;
3497     const int h= b->height;
3498     int x,y;
3499
3500     for(y=0; y<h; y++){
3501         for(x=0; x<w; x++){
3502             int i= x + y*stride;
3503
3504             if(x){
3505                 if(use_median){
3506                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3507                     else  src[i] += src[i - 1];
3508                 }else{
3509                     if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3510                     else  src[i] += src[i - 1];
3511                 }
3512             }else{
3513                 if(y) src[i] += src[i - stride];
3514             }
3515         }
3516     }
3517 }
3518
3519 static void encode_header(SnowContext *s){
3520     int plane_index, level, orientation, i;
3521     uint8_t kstate[32];
3522
3523     memset(kstate, MID_STATE, sizeof(kstate));
3524
3525     put_rac(&s->c, kstate, s->keyframe);
3526     if(s->keyframe || s->always_reset){
3527         reset_contexts(s);
3528         s->last_spatial_decomposition_type=
3529         s->last_qlog=
3530         s->last_qbias=
3531         s->last_mv_scale=
3532         s->last_block_max_depth= 0;
3533         for(plane_index=0; plane_index<2; plane_index++){
3534             Plane *p= &s->plane[plane_index];
3535             p->last_htaps=0;
3536             p->last_diag_mc=0;
3537             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3538         }
3539     }
3540     if(s->keyframe){
3541         put_symbol(&s->c, s->header_state, s->version, 0);
3542         put_rac(&s->c, s->header_state, s->always_reset);
3543         put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3544         put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3545         put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3546         put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3547         put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3548         put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3549         put_rac(&s->c, s->header_state, s->spatial_scalability);
3550 //        put_rac(&s->c, s->header_state, s->rate_scalability);
3551         put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3552
3553         for(plane_index=0; plane_index<2; plane_index++){
3554             for(level=0; level<s->spatial_decomposition_count; level++){
3555                 for(orientation=level ? 1:0; orientation<4; orientation++){
3556                     if(orientation==2) continue;
3557                     put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3558                 }
3559             }
3560         }
3561     }
3562
3563     if(!s->keyframe){
3564         int update_mc=0;
3565         for(plane_index=0; plane_index<2; plane_index++){
3566             Plane *p= &s->plane[plane_index];
3567             update_mc |= p->last_htaps   != p->htaps;
3568             update_mc |= p->last_diag_mc != p->diag_mc;
3569             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3570         }
3571         if(!s->always_reset)
3572             put_rac(&s->c, s->header_state, update_mc);
3573         if(update_mc){
3574             for(plane_index=0; plane_index<2; plane_index++){
3575                 Plane *p= &s->plane[plane_index];
3576                 put_rac(&s->c, s->header_state, p->diag_mc);
3577                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3578                 for(i= p->htaps/2; i; i--)
3579                     put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3580
3581                 p->last_diag_mc= p->diag_mc;
3582                 p->last_htaps= p->htaps;
3583                 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3584             }
3585         }
3586     }
3587
3588     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3589     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3590     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3591     put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3592     put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3593
3594     s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3595     s->last_qlog                      = s->qlog;
3596     s->last_qbias                     = s->qbias;
3597     s->last_mv_scale                  = s->mv_scale;
3598     s->last_block_max_depth           = s->block_max_depth;
3599 }
3600
3601 static int decode_header(SnowContext *s){
3602     int plane_index, level, orientation;
3603     uint8_t kstate[32];
3604
3605     memset(kstate, MID_STATE, sizeof(kstate));
3606
3607     s->keyframe= get_rac(&s->c, kstate);
3608     if(s->keyframe || s->always_reset){
3609         reset_contexts(s);
3610         s->spatial_decomposition_type=
3611         s->qlog=
3612         s->qbias=
3613         s->mv_scale=
3614         s->block_max_depth= 0;
3615     }
3616     if(s->keyframe){
3617         s->version= get_symbol(&s->c, s->header_state, 0);
3618         if(s->version>0){
3619             av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3620             return -1;
3621         }
3622         s->always_reset= get_rac(&s->c, s->header_state);
3623         s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3624         s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3625         s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3626         s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3627         s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3628         s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3629         s->spatial_scalability= get_rac(&s->c, s->header_state);
3630 //        s->rate_scalability= get_rac(&s->c, s->header_state);
3631         s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3632
3633         for(plane_index=0; plane_index<3; plane_index++){
3634             for(level=0; level<s->spatial_decomposition_count; level++){
3635                 for(orientation=level ? 1:0; orientation<4; orientation++){
3636                     int q;
3637                     if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3638                     else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3639                     else                    q= get_symbol(&s->c, s->header_state, 1);
3640                     s->plane[plane_index].band[level][orientation].qlog= q;
3641                 }
3642             }
3643         }
3644     }
3645
3646     if(!s->keyframe){
3647         if(s->always_reset || get_rac(&s->c, s->header_state)){
3648             for(plane_index=0; plane_index<2; plane_index++){
3649                 int htaps, i, sum=0, absum=0;
3650                 Plane *p= &s->plane[plane_index];
3651                 p->diag_mc= get_rac(&s->c, s->header_state);
3652                 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3653                 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3654                     return -1;
3655                 p->htaps= htaps;
3656                 for(i= htaps/2; i; i--){
3657                     p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3658                     sum += p->hcoeff[i];
3659                 }
3660                 p->hcoeff[0]= 32-sum;
3661             }
3662             s->plane[2].diag_mc= s->plane[1].diag_mc;
3663             s->plane[2].htaps  = s->plane[1].htaps;
3664             memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3665         }
3666     }
3667
3668     s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3669     if(s->spatial_decomposition_type > 1){
3670         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3671         return -1;
3672     }
3673
3674     s->qlog           += get_symbol(&s->c, s->header_state, 1);
3675     s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3676     s->qbias          += get_symbol(&s->c, s->header_state, 1);
3677     s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3678     if(s->block_max_depth > 1 || s->block_max_depth < 0){
3679         av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3680         s->block_max_depth= 0;
3681         return -1;
3682     }
3683
3684     return 0;
3685 }
3686
3687 static void init_qexp(void){
3688     int i;
3689     double v=128;
3690
3691     for(i=0; i<QROOT; i++){
3692         qexp[i]= lrintf(v);
3693         v *= pow(2, 1.0 / QROOT);
3694     }
3695 }
3696
3697 static int common_init(AVCodecContext *avctx){
3698     SnowContext *s = avctx->priv_data;
3699     int width, height;
3700     int i, j;
3701
3702     s->avctx= avctx;
3703
3704     dsputil_init(&s->dsp, avctx);
3705
3706 #define mcf(dx,dy)\
3707     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3708     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3709         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3710     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3711     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3712         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3713
3714     mcf( 0, 0)
3715     mcf( 4, 0)
3716     mcf( 8, 0)
3717     mcf(12, 0)
3718     mcf( 0, 4)
3719     mcf( 4, 4)
3720     mcf( 8, 4)
3721     mcf(12, 4)
3722     mcf( 0, 8)
3723     mcf( 4, 8)
3724     mcf( 8, 8)
3725     mcf(12, 8)
3726     mcf( 0,12)
3727     mcf( 4,12)
3728     mcf( 8,12)
3729     mcf(12,12)
3730
3731 #define mcfh(dx,dy)\
3732     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3733     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3734         mc_block_hpel ## dx ## dy ## 16;\
3735     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3736     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3737         mc_block_hpel ## dx ## dy ## 8;
3738
3739     mcfh(0, 0)
3740     mcfh(8, 0)
3741     mcfh(0, 8)
3742     mcfh(8, 8)
3743
3744     if(!qexp[0])
3745         init_qexp();
3746
3747 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3748
3749     width= s->avctx->width;
3750     height= s->avctx->height;
3751
3752     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3753     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3754
3755     for(i=0; i<MAX_REF_FRAMES; i++)
3756         for(j=0; j<MAX_REF_FRAMES; j++)
3757             scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3758
3759     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3760
3761     return 0;
3762 }
3763
3764 static int common_init_after_header(AVCodecContext *avctx){
3765     SnowContext *s = avctx->priv_data;
3766     int plane_index, level, orientation;
3767
3768     for(plane_index=0; plane_index<3; plane_index++){
3769         int w= s->avctx->width;
3770         int h= s->avctx->height;
3771
3772         if(plane_index){
3773             w>>= s->chroma_h_shift;
3774             h>>= s->chroma_v_shift;
3775         }
3776         s->plane[plane_index].width = w;
3777         s->plane[plane_index].height= h;
3778
3779 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3780         for(level=s->spatial_decomposition_count-1; level>=0; level--){
3781             for(orientation=level ? 1 : 0; orientation<4; orientation++){
3782                 SubBand *b= &s->plane[plane_index].band[level][orientation];
3783
3784                 b->buf= s->spatial_dwt_buffer;
3785                 b->level= level;
3786                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3787                 b->width = (w + !(orientation&1))>>1;
3788                 b->height= (h + !(orientation>1))>>1;
3789
3790                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3791                 b->buf_x_offset = 0;
3792                 b->buf_y_offset = 0;
3793
3794                 if(orientation&1){
3795                     b->buf += (w+1)>>1;
3796                     b->buf_x_offset = (w+1)>>1;
3797                 }
3798                 if(orientation>1){
3799                     b->buf += b->stride>>1;
3800                     b->buf_y_offset = b->stride_line >> 1;
3801                 }
3802                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3803
3804                 if(level)
3805                     b->parent= &s->plane[plane_index].band[level-1][orientation];
3806                 //FIXME avoid this realloc
3807                 av_freep(&b->x_coeff);
3808                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3809             }
3810             w= (w+1)>>1;
3811             h= (h+1)>>1;
3812         }
3813     }
3814
3815     return 0;
3816 }
3817
3818 static int qscale2qlog(int qscale){
3819     return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3820            + 61*QROOT/8; //<64 >60
3821 }
3822
3823 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3824 {
3825     /* estimate the frame's complexity as a sum of weighted dwt coefs.
3826      * FIXME we know exact mv bits at this point,
3827      * but ratecontrol isn't set up to include them. */
3828     uint32_t coef_sum= 0;
3829     int level, orientation, delta_qlog;
3830
3831     for(level=0; level<s->spatial_decomposition_count; level++){
3832         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3833             SubBand *b= &s->plane[0].band[level][orientation];
3834             IDWTELEM *buf= b->ibuf;
3835             const int w= b->width;
3836             const int h= b->height;
3837             const int stride= b->stride;
3838             const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3839             const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3840             const int qdiv= (1<<16)/qmul;
3841             int x, y;
3842             //FIXME this is ugly
3843             for(y=0; y<h; y++)
3844                 for(x=0; x<w; x++)
3845                     buf[x+y*stride]= b->buf[x+y*stride];
3846             if(orientation==0)
3847                 decorrelate(s, b, buf, stride, 1, 0);
3848             for(y=0; y<h; y++)
3849                 for(x=0; x<w; x++)
3850                     coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3851         }
3852     }
3853
3854     /* ugly, ratecontrol just takes a sqrt again */
3855     coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3856     assert(coef_sum < INT_MAX);
3857
3858     if(pict->pict_type == I_TYPE){
3859         s->m.current_picture.mb_var_sum= coef_sum;
3860         s->m.current_picture.mc_mb_var_sum= 0;
3861     }else{
3862         s->m.current_picture.mc_mb_var_sum= coef_sum;
3863         s->m.current_picture.mb_var_sum= 0;
3864     }
3865
3866     pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3867     if (pict->quality < 0)
3868         return INT_MIN;
3869     s->lambda= pict->quality * 3/2;
3870     delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3871     s->qlog+= delta_qlog;
3872     return delta_qlog;
3873 }
3874
3875 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3876     int width = p->width;
3877     int height= p->height;
3878     int level, orientation, x, y;
3879
3880     for(level=0; level<s->spatial_decomposition_count; level++){
3881         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3882             SubBand *b= &p->band[level][orientation];
3883             IDWTELEM *ibuf= b->ibuf;
3884             int64_t error=0;
3885
3886             memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3887             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3888             ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3889             for(y=0; y<height; y++){
3890                 for(x=0; x<width; x++){
3891                     int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3892                     error += d*d;
3893                 }
3894             }
3895
3896             b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3897 //            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3898         }
3899     }
3900 }
3901
3902 static int encode_init(AVCodecContext *avctx)
3903 {
3904     SnowContext *s = avctx->priv_data;
3905     int plane_index;
3906
3907     if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3908         av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3909                "use vstrict=-2 / -strict -2 to use it anyway\n");
3910         return -1;
3911     }
3912
3913     if(avctx->prediction_method == DWT_97
3914        && (avctx->flags & CODEC_FLAG_QSCALE)
3915        && avctx->global_quality == 0){
3916         av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3917         return -1;
3918     }
3919
3920     s->spatial_decomposition_count= 5;
3921     s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3922
3923     s->chroma_h_shift= 1; //FIXME XXX
3924     s->chroma_v_shift= 1;
3925
3926     s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3927     s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3928
3929     for(plane_index=0; plane_index<3; plane_index++){
3930         s->plane[plane_index].diag_mc= 1;
3931         s->plane[plane_index].htaps= 6;
3932         s->plane[plane_index].hcoeff[0]=  40;
3933         s->plane[plane_index].hcoeff[1]= -10;
3934         s->plane[plane_index].hcoeff[2]=   2;
3935         s->plane[plane_index].fast_mc= 1;
3936     }
3937
3938     common_init(avctx);
3939     common_init_after_header(avctx);
3940     alloc_blocks(s);
3941
3942     s->version=0;
3943
3944     s->m.avctx   = avctx;
3945     s->m.flags   = avctx->flags;
3946     s->m.bit_rate= avctx->bit_rate;
3947
3948     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3949     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3950     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3951     s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3952     h263_encode_init(&s->m); //mv_penalty
3953
3954     s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3955
3956     if(avctx->flags&CODEC_FLAG_PASS1){
3957         if(!avctx->stats_out)
3958             avctx->stats_out = av_mallocz(256);
3959     }
3960     if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3961         if(ff_rate_control_init(&s->m) < 0)
3962             return -1;
3963     }
3964     s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3965
3966     for(plane_index=0; plane_index<3; plane_index++){
3967         calculate_vissual_weight(s, &s->plane[plane_index]);
3968     }
3969
3970
3971     avctx->coded_frame= &s->current_picture;
3972     switch(avctx->pix_fmt){
3973 //    case PIX_FMT_YUV444P:
3974 //    case PIX_FMT_YUV422P:
3975     case PIX_FMT_YUV420P:
3976     case PIX_FMT_GRAY8:
3977 //    case PIX_FMT_YUV411P:
3978 //    case PIX_FMT_YUV410P:
3979         s->colorspace_type= 0;
3980         break;
3981 /*    case PIX_FMT_RGB32:
3982         s->colorspace= 1;
3983         break;*/
3984     default:
3985         av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3986         return -1;
3987     }
3988 //    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3989     s->chroma_h_shift= 1;
3990     s->chroma_v_shift= 1;
3991
3992     ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3993     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3994
3995     s->avctx->get_buffer(s->avctx, &s->input_picture);
3996
3997     if(s->avctx->me_method == ME_ITER){
3998         int i;
3999         int size= s->b_width * s->b_height << 2*s->block_max_depth;
4000         for(i=0; i<s->max_ref_frames; i++){
4001             s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4002             s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4003         }
4004     }
4005
4006     return 0;
4007 }
4008
4009 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4010     int p,x,y;
4011
4012     assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4013
4014     for(p=0; p<3; p++){
4015         int is_chroma= !!p;
4016         int w= s->avctx->width  >>is_chroma;
4017         int h= s->avctx->height >>is_chroma;
4018         int ls= frame->linesize[p];
4019         uint8_t *src= frame->data[p];
4020
4021         halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4022         halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4023         halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4024
4025         halfpel[0][p]= src;
4026         for(y=0; y<h; y++){
4027             for(x=0; x<w; x++){
4028                 int i= y*ls + x;
4029
4030                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4031             }
4032         }
4033         for(y=0; y<h; y++){
4034             for(x=0; x<w; x++){
4035                 int i= y*ls + x;
4036
4037                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4038             }
4039         }
4040         src= halfpel[1][p];
4041         for(y=0; y<h; y++){
4042             for(x=0; x<w; x++){
4043                 int i= y*ls + x;
4044
4045                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4046             }
4047         }
4048
4049 //FIXME border!
4050     }
4051 }
4052
4053 static int frame_start(SnowContext *s){
4054    AVFrame tmp;
4055    int w= s->avctx->width; //FIXME round up to x16 ?
4056    int h= s->avctx->height;
4057
4058     if(s->current_picture.data[0]){
4059         draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
4060         draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4061         draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4062     }
4063
4064     tmp= s->last_picture[s->max_ref_frames-1];
4065     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4066     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4067 #ifdef USE_HALFPEL_PLANE
4068     if(s->current_picture.data[0])
4069         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4070 #endif
4071     s->last_picture[0]= s->current_picture;
4072     s->current_picture= tmp;
4073
4074     if(s->keyframe){
4075         s->ref_frames= 0;
4076     }else{
4077         int i;
4078         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4079             if(i && s->last_picture[i-1].key_frame)
4080                 break;
4081         s->ref_frames= i;
4082     }
4083
4084     s->current_picture.reference= 1;
4085     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4086         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4087         return -1;
4088     }
4089
4090     s->current_picture.key_frame= s->keyframe;
4091
4092     return 0;
4093 }
4094
4095 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4096     SnowContext *s = avctx->priv_data;
4097     RangeCoder * const c= &s->c;
4098     AVFrame *pict = data;
4099     const int width= s->avctx->width;
4100     const int height= s->avctx->height;
4101     int level, orientation, plane_index, i, y;
4102     uint8_t rc_header_bak[sizeof(s->header_state)];
4103     uint8_t rc_block_bak[sizeof(s->block_state)];
4104
4105     ff_init_range_encoder(c, buf, buf_size);
4106     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4107
4108     for(i=0; i<3; i++){
4109         int shift= !!i;
4110         for(y=0; y<(height>>shift); y++)
4111             memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4112                    &pict->data[i][y * pict->linesize[i]],
4113                    width>>shift);
4114     }
4115     s->new_picture = *pict;
4116
4117     s->m.picture_number= avctx->frame_number;
4118     if(avctx->flags&CODEC_FLAG_PASS2){
4119         s->m.pict_type =
4120         pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4121         s->keyframe= pict->pict_type==FF_I_TYPE;
4122         if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4123             pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4124             if (pict->quality < 0)
4125                 return -1;
4126         }
4127     }else{
4128         s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4129         s->m.pict_type=
4130         pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4131     }
4132
4133     if(s->pass1_rc && avctx->frame_number == 0)
4134         pict->quality= 2*FF_QP2LAMBDA;
4135     if(pict->quality){
4136         s->qlog= qscale2qlog(pict->quality);
4137         s->lambda = pict->quality * 3/2;
4138     }
4139     if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4140         s->qlog= LOSSLESS_QLOG;
4141         s->lambda = 0;
4142     }//else keep previous frame's qlog until after motion est
4143
4144     frame_start(s);
4145
4146     s->m.current_picture_ptr= &s->m.current_picture;
4147     if(pict->pict_type == P_TYPE){
4148         int block_width = (width +15)>>4;
4149         int block_height= (height+15)>>4;
4150         int stride= s->current_picture.linesize[0];
4151
4152         assert(s->current_picture.data[0]);
4153         assert(s->last_picture[0].data[0]);
4154
4155         s->m.avctx= s->avctx;
4156         s->m.current_picture.data[0]= s->current_picture.data[0];
4157         s->m.   last_picture.data[0]= s->last_picture[0].data[0];
4158         s->m.    new_picture.data[0]= s->  input_picture.data[0];
4159         s->m.   last_picture_ptr= &s->m.   last_picture;
4160         s->m.linesize=
4161         s->m.   last_picture.linesize[0]=
4162         s->m.    new_picture.linesize[0]=
4163         s->m.current_picture.linesize[0]= stride;
4164         s->m.uvlinesize= s->current_picture.linesize[1];
4165         s->m.width = width;
4166         s->m.height= height;
4167         s->m.mb_width = block_width;
4168         s->m.mb_height= block_height;
4169         s->m.mb_stride=   s->m.mb_width+1;
4170         s->m.b8_stride= 2*s->m.mb_width+1;
4171         s->m.f_code=1;
4172         s->m.pict_type= pict->pict_type;
4173         s->m.me_method= s->avctx->me_method;
4174         s->m.me.scene_change_score=0;
4175         s->m.flags= s->avctx->flags;
4176         s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4177         s->m.out_format= FMT_H263;
4178         s->m.unrestricted_mv= 1;
4179
4180         s->m.lambda = s->lambda;
4181         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4182         s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4183
4184         s->m.dsp= s->dsp; //move
4185         ff_init_me(&s->m);
4186         s->dsp= s->m.dsp;
4187     }
4188
4189     if(s->pass1_rc){
4190         memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4191         memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4192     }
4193
4194 redo_frame:
4195
4196     s->m.pict_type = pict->pict_type;
4197     s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4198
4199     encode_header(s);
4200     s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4201     encode_blocks(s, 1);
4202     s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4203
4204     for(plane_index=0; plane_index<3; plane_index++){
4205         Plane *p= &s->plane[plane_index];
4206         int w= p->width;
4207         int h= p->height;
4208         int x, y;
4209 //        int bits= put_bits_count(&s->c.pb);
4210
4211     if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4212         //FIXME optimize
4213      if(pict->data[plane_index]) //FIXME gray hack
4214         for(y=0; y<h; y++){
4215             for(x=0; x<w; x++){
4216                 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4217             }
4218         }
4219         predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4220
4221         if(   plane_index==0
4222            && pict->pict_type == P_TYPE
4223            && !(avctx->flags&CODEC_FLAG_PASS2)
4224            && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4225             ff_init_range_encoder(c, buf, buf_size);
4226             ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4227             pict->pict_type= FF_I_TYPE;
4228             s->keyframe=1;
4229             s->current_picture.key_frame=1;
4230             goto redo_frame;
4231         }
4232
4233         if(s->qlog == LOSSLESS_QLOG){
4234             for(y=0; y<h; y++){
4235                 for(x=0; x<w; x++){
4236                     s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4237                 }
4238             }
4239         }else{
4240             for(y=0; y<h; y++){
4241                 for(x=0; x<w; x++){
4242                     s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4243                 }
4244             }
4245         }
4246
4247         ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4248
4249         if(s->pass1_rc && plane_index==0){
4250             int delta_qlog = ratecontrol_1pass(s, pict);
4251             if (delta_qlog <= INT_MIN)
4252                 return -1;
4253             if(delta_qlog){
4254                 //reordering qlog in the bitstream would eliminate this reset
4255                 ff_init_range_encoder(c, buf, buf_size);
4256                 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4257                 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4258                 encode_header(s);
4259                 encode_blocks(s, 0);
4260             }
4261         }
4262
4263         for(level=0; level<s->spatial_decomposition_count; level++){
4264             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4265                 SubBand *b= &p->band[level][orientation];
4266
4267                 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4268                 if(orientation==0)
4269                     decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
4270                 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4271                 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4272                 if(orientation==0)
4273                     correlate(s, b, b->ibuf, b->stride, 1, 0);
4274             }
4275         }
4276 //        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4277
4278         for(level=0; level<s->spatial_decomposition_count; level++){
4279             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4280                 SubBand *b= &p->band[level][orientation];
4281
4282                 dequantize(s, b, b->ibuf, b->stride);
4283             }
4284         }
4285
4286         ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4287         if(s->qlog == LOSSLESS_QLOG){
4288             for(y=0; y<h; y++){
4289                 for(x=0; x<w; x++){
4290                     s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4291                 }
4292             }
4293         }
4294 {START_TIMER
4295         predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4296 STOP_TIMER("pred-conv")}
4297       }else{
4298             //ME/MC only
4299             if(pict->pict_type == I_TYPE){
4300                 for(y=0; y<h; y++){
4301                     for(x=0; x<w; x++){
4302                         s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4303                             pict->data[plane_index][y*pict->linesize[plane_index] + x];
4304                     }
4305                 }
4306             }else{
4307                 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4308                 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4309             }
4310       }
4311         if(s->avctx->flags&CODEC_FLAG_PSNR){
4312             int64_t error= 0;
4313
4314     if(pict->data[plane_index]) //FIXME gray hack
4315             for(y=0; y<h; y++){
4316                 for(x=0; x<w; x++){
4317                     int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4318                     error += d*d;
4319                 }
4320             }
4321             s->avctx->error[plane_index] += error;
4322             s->current_picture.error[plane_index] = error;
4323         }
4324     }
4325
4326     if(s->last_picture[s->max_ref_frames-1].data[0]){
4327         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4328         for(i=0; i<9; i++)
4329             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4330                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4331     }
4332
4333     s->current_picture.coded_picture_number = avctx->frame_number;
4334     s->current_picture.pict_type = pict->pict_type;
4335     s->current_picture.quality = pict->quality;
4336     s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4337     s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4338     s->m.current_picture.display_picture_number =
4339     s->m.current_picture.coded_picture_number = avctx->frame_number;
4340     s->m.current_picture.quality = pict->quality;
4341     s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4342     if(s->pass1_rc)
4343         if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4344             return -1;
4345     if(avctx->flags&CODEC_FLAG_PASS1)
4346         ff_write_pass1_stats(&s->m);
4347     s->m.last_pict_type = s->m.pict_type;
4348     avctx->frame_bits = s->m.frame_bits;
4349     avctx->mv_bits = s->m.mv_bits;
4350     avctx->misc_bits = s->m.misc_bits;
4351     avctx->p_tex_bits = s->m.p_tex_bits;
4352
4353     emms_c();
4354
4355     return ff_rac_terminate(c);
4356 }
4357
4358 static void common_end(SnowContext *s){
4359     int plane_index, level, orientation, i;
4360
4361     av_freep(&s->spatial_dwt_buffer);
4362     av_freep(&s->spatial_idwt_buffer);
4363
4364     av_freep(&s->m.me.scratchpad);
4365     av_freep(&s->m.me.map);
4366     av_freep(&s->m.me.score_map);
4367     av_freep(&s->m.obmc_scratchpad);
4368
4369     av_freep(&s->block);
4370
4371     for(i=0; i<MAX_REF_FRAMES; i++){
4372         av_freep(&s->ref_mvs[i]);
4373         av_freep(&s->ref_scores[i]);
4374         if(s->last_picture[i].data[0])
4375             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4376     }
4377
4378     for(plane_index=0; plane_index<3; plane_index++){
4379         for(level=s->spatial_decomposition_count-1; level>=0; level--){
4380             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4381                 SubBand *b= &s->plane[plane_index].band[level][orientation];
4382
4383                 av_freep(&b->x_coeff);
4384             }
4385         }
4386     }
4387 }
4388
4389 static int encode_end(AVCodecContext *avctx)
4390 {
4391     SnowContext *s = avctx->priv_data;
4392
4393     common_end(s);
4394     av_free(avctx->stats_out);
4395
4396     return 0;
4397 }
4398
4399 static int decode_init(AVCodecContext *avctx)
4400 {
4401     SnowContext *s = avctx->priv_data;
4402
4403     avctx->pix_fmt= PIX_FMT_YUV420P;
4404
4405     common_init(avctx);
4406
4407     return 0;
4408 }
4409
4410 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4411     SnowContext *s = avctx->priv_data;
4412     RangeCoder * const c= &s->c;
4413     int bytes_read;
4414     AVFrame *picture = data;
4415     int level, orientation, plane_index, i;
4416
4417     ff_init_range_decoder(c, buf, buf_size);
4418     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4419
4420     s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4421     decode_header(s);
4422     common_init_after_header(avctx);
4423
4424     // realloc slice buffer for the case that spatial_decomposition_count changed
4425     slice_buffer_destroy(&s->sb);
4426     slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4427
4428     for(plane_index=0; plane_index<3; plane_index++){
4429         Plane *p= &s->plane[plane_index];
4430         p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4431                                               && p->hcoeff[1]==-10
4432                                               && p->hcoeff[2]==2;
4433     }
4434
4435     if(!s->block) alloc_blocks(s);
4436
4437     frame_start(s);
4438     //keyframe flag dupliaction mess FIXME
4439     if(avctx->debug&FF_DEBUG_PICT_INFO)
4440         av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4441
4442     decode_blocks(s);
4443
4444     for(plane_index=0; plane_index<3; plane_index++){
4445         Plane *p= &s->plane[plane_index];
4446         int w= p->width;
4447         int h= p->height;
4448         int x, y;
4449         int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4450
4451 if(s->avctx->debug&2048){
4452         memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4453         predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4454
4455         for(y=0; y<h; y++){
4456             for(x=0; x<w; x++){
4457                 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4458                 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4459             }
4460         }
4461 }
4462
4463 {   START_TIMER
4464     for(level=0; level<s->spatial_decomposition_count; level++){
4465         for(orientation=level ? 1 : 0; orientation<4; orientation++){
4466             SubBand *b= &p->band[level][orientation];
4467             unpack_coeffs(s, b, b->parent, orientation);
4468         }
4469     }
4470     STOP_TIMER("unpack coeffs");
4471 }
4472
4473 {START_TIMER
4474     const int mb_h= s->b_height << s->block_max_depth;
4475     const int block_size = MB_SIZE >> s->block_max_depth;
4476     const int block_w    = plane_index ? block_size/2 : block_size;
4477     int mb_y;
4478     dwt_compose_t cs[MAX_DECOMPOSITIONS];
4479     int yd=0, yq=0;
4480     int y;
4481     int end_y;
4482
4483     ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4484     for(mb_y=0; mb_y<=mb_h; mb_y++){
4485
4486         int slice_starty = block_w*mb_y;
4487         int slice_h = block_w*(mb_y+1);
4488         if (!(s->keyframe || s->avctx->debug&512)){
4489             slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4490             slice_h -= (block_w >> 1);
4491         }
4492
4493         {
4494         START_TIMER
4495         for(level=0; level<s->spatial_decomposition_count; level++){
4496             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4497                 SubBand *b= &p->band[level][orientation];
4498                 int start_y;
4499                 int end_y;
4500                 int our_mb_start = mb_y;
4501                 int our_mb_end = (mb_y + 1);
4502                 const int extra= 3;
4503                 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4504                 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4505                 if (!(s->keyframe || s->avctx->debug&512)){
4506                     start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4507                     end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4508                 }
4509                 start_y = FFMIN(b->height, start_y);
4510                 end_y = FFMIN(b->height, end_y);
4511
4512                 if (start_y != end_y){
4513                     if (orientation == 0){
4514                         SubBand * correlate_band = &p->band[0][0];
4515                         int correlate_end_y = FFMIN(b->height, end_y + 1);
4516                         int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4517                         decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4518                         correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4519                         dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4520                     }
4521                     else
4522                         decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4523                 }
4524             }
4525         }
4526         STOP_TIMER("decode_subband_slice");
4527         }
4528
4529 {   START_TIMER
4530         for(; yd<slice_h; yd+=4){
4531             ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4532         }
4533     STOP_TIMER("idwt slice");}
4534
4535
4536         if(s->qlog == LOSSLESS_QLOG){
4537             for(; yq<slice_h && yq<h; yq++){
4538                 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4539                 for(x=0; x<w; x++){
4540                     line[x] <<= FRAC_BITS;
4541                 }
4542             }
4543         }
4544
4545         predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4546
4547         y = FFMIN(p->height, slice_starty);
4548         end_y = FFMIN(p->height, slice_h);
4549         while(y < end_y)
4550             slice_buffer_release(&s->sb, y++);
4551     }
4552
4553     slice_buffer_flush(&s->sb);
4554
4555 STOP_TIMER("idwt + predict_slices")}
4556     }
4557
4558     emms_c();
4559
4560     if(s->last_picture[s->max_ref_frames-1].data[0]){
4561         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4562         for(i=0; i<9; i++)
4563             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4564                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4565     }
4566
4567 if(!(s->avctx->debug&2048))
4568     *picture= s->current_picture;
4569 else
4570     *picture= s->mconly_picture;
4571
4572     *data_size = sizeof(AVFrame);
4573
4574     bytes_read= c->bytestream - c->bytestream_start;
4575     if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4576
4577     return bytes_read;
4578 }
4579
4580 static int decode_end(AVCodecContext *avctx)
4581 {
4582     SnowContext *s = avctx->priv_data;
4583
4584     slice_buffer_destroy(&s->sb);
4585
4586     common_end(s);
4587
4588     return 0;
4589 }
4590
4591 AVCodec snow_decoder = {
4592     "snow",
4593     CODEC_TYPE_VIDEO,
4594     CODEC_ID_SNOW,
4595     sizeof(SnowContext),
4596     decode_init,
4597     NULL,
4598     decode_end,
4599     decode_frame,
4600     0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4601     NULL
4602 };
4603
4604 #ifdef CONFIG_SNOW_ENCODER
4605 AVCodec snow_encoder = {
4606     "snow",
4607     CODEC_TYPE_VIDEO,
4608     CODEC_ID_SNOW,
4609     sizeof(SnowContext),
4610     encode_init,
4611     encode_frame,
4612     encode_end,
4613 };
4614 #endif
4615
4616
4617 #if 0
4618 #undef malloc
4619 #undef free
4620 #undef printf
4621 #undef random
4622
4623 int main(){
4624     int width=256;
4625     int height=256;
4626     int buffer[2][width*height];
4627     SnowContext s;
4628     int i;
4629     s.spatial_decomposition_count=6;
4630     s.spatial_decomposition_type=1;
4631
4632     printf("testing 5/3 DWT\n");
4633     for(i=0; i<width*height; i++)
4634         buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4635
4636     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4637     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4638
4639     for(i=0; i<width*height; i++)
4640         if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4641
4642     printf("testing 9/7 DWT\n");
4643     s.spatial_decomposition_type=0;
4644     for(i=0; i<width*height; i++)
4645         buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4646
4647     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4648     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4649
4650     for(i=0; i<width*height; i++)
4651         if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4652
4653 #if 0
4654     printf("testing AC coder\n");
4655     memset(s.header_state, 0, sizeof(s.header_state));
4656     ff_init_range_encoder(&s.c, buffer[0], 256*256);
4657     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4658
4659     for(i=-256; i<256; i++){
4660 START_TIMER
4661         put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4662 STOP_TIMER("put_symbol")
4663     }
4664     ff_rac_terminate(&s.c);
4665
4666     memset(s.header_state, 0, sizeof(s.header_state));
4667     ff_init_range_decoder(&s.c, buffer[0], 256*256);
4668     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4669
4670     for(i=-256; i<256; i++){
4671         int j;
4672 START_TIMER
4673         j= get_symbol(&s.c, s.header_state, 1);
4674 STOP_TIMER("get_symbol")
4675         if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4676     }
4677 #endif
4678 {
4679 int level, orientation, x, y;
4680 int64_t errors[8][4];
4681 int64_t g=0;
4682
4683     memset(errors, 0, sizeof(errors));
4684     s.spatial_decomposition_count=3;
4685     s.spatial_decomposition_type=0;
4686     for(level=0; level<s.spatial_decomposition_count; level++){
4687         for(orientation=level ? 1 : 0; orientation<4; orientation++){
4688             int w= width  >> (s.spatial_decomposition_count-level);
4689             int h= height >> (s.spatial_decomposition_count-level);
4690             int stride= width  << (s.spatial_decomposition_count-level);
4691             DWTELEM *buf= buffer[0];
4692             int64_t error=0;
4693
4694             if(orientation&1) buf+=w;
4695             if(orientation>1) buf+=stride>>1;
4696
4697             memset(buffer[0], 0, sizeof(int)*width*height);
4698             buf[w/2 + h/2*stride]= 256*256;
4699             ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4700             for(y=0; y<height; y++){
4701                 for(x=0; x<width; x++){
4702                     int64_t d= buffer[0][x + y*width];
4703                     error += d*d;
4704                     if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4705                 }
4706                 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4707             }
4708             error= (int)(sqrt(error)+0.5);
4709             errors[level][orientation]= error;
4710             if(g) g=ff_gcd(g, error);
4711             else g= error;
4712         }
4713     }
4714     printf("static int const visual_weight[][4]={\n");
4715     for(level=0; level<s.spatial_decomposition_count; level++){
4716         printf("  {");
4717         for(orientation=0; orientation<4; orientation++){
4718             printf("%8"PRId64",", errors[level][orientation]/g);
4719         }
4720         printf("},\n");
4721     }
4722     printf("};\n");
4723     {
4724             int level=2;
4725             int orientation=3;
4726             int w= width  >> (s.spatial_decomposition_count-level);
4727             int h= height >> (s.spatial_decomposition_count-level);
4728             int stride= width  << (s.spatial_decomposition_count-level);
4729             DWTELEM *buf= buffer[0];
4730             int64_t error=0;
4731
4732             buf+=w;
4733             buf+=stride>>1;
4734
4735             memset(buffer[0], 0, sizeof(int)*width*height);
4736 #if 1
4737             for(y=0; y<height; y++){
4738                 for(x=0; x<width; x++){
4739                     int tab[4]={0,2,3,1};
4740                     buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4741                 }
4742             }
4743             ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4744 #else
4745             for(y=0; y<h; y++){
4746                 for(x=0; x<w; x++){
4747                     buf[x + y*stride  ]=169;
4748                     buf[x + y*stride-w]=64;
4749                 }
4750             }
4751             ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4752 #endif
4753             for(y=0; y<height; y++){
4754                 for(x=0; x<width; x++){
4755                     int64_t d= buffer[0][x + y*width];
4756                     error += d*d;
4757                     if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4758                 }
4759                 if(FFABS(height/2-y)<9) printf("\n");
4760             }
4761     }
4762
4763 }
4764     return 0;
4765 }
4766 #endif
4767