libavcodec/snow.c

   1 /*
   2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "avcodec.h"
  22 #include "dsputil.h"
  23 #include "snow.h"
  24
  25 #include "rangecoder.h"
  26 #include "mathops.h"
  27
  28 #include "mpegvideo.h"
  29
  30 #undef NDEBUG
  31 #include <assert.h>
  32
  33 static const int8_t quant3[256]={
  34  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  35  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  36  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  37  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  39  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  40  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  41  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
  50 };
  51 static const int8_t quant3b[256]={
  52  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  53  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  54  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  55  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  56  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  57  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  58  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  59  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  68 };
  69 static const int8_t quant3bA[256]={
  70  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  71  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  72  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  73  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  74  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  75  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  76  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  77  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  78  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  79  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  80  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  81  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  82  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  83  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  84  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  85  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  86 };
  87 static const int8_t quant5[256]={
  88  0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  89  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  90  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  91  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  92  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  93  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  95  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
 104 };
 105 static const int8_t quant7[256]={
 106  0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 107  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 108  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
 109  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 110  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 111  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 112  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 113  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
 120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
 122 };
 123 static const int8_t quant9[256]={
 124  0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 125  3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 126  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 127  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 128  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 129  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 130  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 131  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
 139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
 140 };
 141 static const int8_t quant11[256]={
 142  0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
 143  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 144  4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 145  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 146  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 147  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 148  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 149  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
 156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
 158 };
 159 static const int8_t quant13[256]={
 160  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
 161  4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 162  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 163  5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 164  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 165  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 166  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 167  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
 173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
 176 };
 177
 178 #if 0 //64*cubic
 179 static const uint8_t obmc32[1024]={
 180   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 181   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 182   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 183   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
 184   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
 185   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
 186   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
 187   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
 188   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
 189   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
 190   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
 191   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
 192   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
 193   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
 194   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
 195   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
 196   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
 197   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
 198   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
 199   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
 200   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
 201   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
 202   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
 203   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
 204   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
 205   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
 206   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
 207   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
 208   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
 209   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 210   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 211   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 212 //error:0.000022
 213 };
 214 static const uint8_t obmc16[256]={
 215   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 216   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
 217   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
 218   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
 219   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
 220   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
 221   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
 222   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
 223   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
 224   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
 225   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
 226   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
 227   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
 228   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
 229   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
 230   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 231 //error:0.000033
 232 };
 233 #elif 1 // 64*linear
 234 static const uint8_t obmc32[1024]={
 235   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
 236   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
 237   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
 238   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
 239   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
 240   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
 241   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
 242   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
 243   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
 244   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
 245   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
 246   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
 247   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
 248   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
 249   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
 250   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
 251   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
 252   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
 253   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
 254   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
 255   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
 256   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
 257   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
 258   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
 259   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
 260   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
 261   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
 262   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
 263   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
 264   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
 265   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
 266   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
 267  //error:0.000020
 268 };
 269 static const uint8_t obmc16[256]={
 270   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 271   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
 272   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
 273   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
 274   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
 275  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
 276  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
 277  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
 278  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
 279  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
 280  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
 281   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
 282   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
 283   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
 284   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
 285   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 286 //error:0.000015
 287 };
 288 #else //64*cos
 289 static const uint8_t obmc32[1024]={
 290   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 291   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 292   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 293   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
 294   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
 295   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
 296   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
 297   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
 298   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
 299   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
 300   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
 301   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
 302   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
 303   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
 304   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
 305   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
 306   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
 307   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
 308   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
 309   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
 310   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
 311   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
 312   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
 313   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
 314   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
 315   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
 316   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
 317   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
 318   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
 319   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 320   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 321   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 322 //error:0.000022
 323 };
 324 static const uint8_t obmc16[256]={
 325   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 326   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
 327   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
 328   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
 329   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
 330   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
 331   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
 332   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
 333   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
 334   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
 335   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
 336   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
 337   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
 338   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
 339   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
 340   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 341 //error:0.000022
 342 };
 343 #endif /* 0 */
 344
 345 //linear *64
 346 static const uint8_t obmc8[64]={
 347   4, 12, 20, 28, 28, 20, 12,  4,
 348  12, 36, 60, 84, 84, 60, 36, 12,
 349  20, 60,100,140,140,100, 60, 20,
 350  28, 84,140,196,196,140, 84, 28,
 351  28, 84,140,196,196,140, 84, 28,
 352  20, 60,100,140,140,100, 60, 20,
 353  12, 36, 60, 84, 84, 60, 36, 12,
 354   4, 12, 20, 28, 28, 20, 12,  4,
 355 //error:0.000000
 356 };
 357
 358 //linear *64
 359 static const uint8_t obmc4[16]={
 360  16, 48, 48, 16,
 361  48,144,144, 48,
 362  48,144,144, 48,
 363  16, 48, 48, 16,
 364 //error:0.000000
 365 };
 366
 367 static const uint8_t * const obmc_tab[4]={
 368     obmc32, obmc16, obmc8, obmc4
 369 };
 370
 371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
 372
 373 typedef struct BlockNode{
 374     int16_t mx;
 375     int16_t my;
 376     uint8_t ref;
 377     uint8_t color[3];
 378     uint8_t type;
 379 //#define TYPE_SPLIT    1
 380 #define BLOCK_INTRA   1
 381 #define BLOCK_OPT     2
 382 //#define TYPE_NOCOLOR  4
 383     uint8_t level; //FIXME merge into type?
 384 }BlockNode;
 385
 386 static const BlockNode null_block= { //FIXME add border maybe
 387     .color= {128,128,128},
 388     .mx= 0,
 389     .my= 0,
 390     .ref= 0,
 391     .type= 0,
 392     .level= 0,
 393 };
 394
 395 #define LOG2_MB_SIZE 4
 396 #define MB_SIZE (1<<LOG2_MB_SIZE)
 397 #define ENCODER_EXTRA_BITS 4
 398 #define HTAPS_MAX 8
 399
 400 typedef struct x_and_coeff{
 401     int16_t x;
 402     uint16_t coeff;
 403 } x_and_coeff;
 404
 405 typedef struct SubBand{
 406     int level;
 407     int stride;
 408     int width;
 409     int height;
 410     int qlog;        ///< log(qscale)/log[2^(1/6)]
 411     DWTELEM *buf;
 412     IDWTELEM *ibuf;
 413     int buf_x_offset;
 414     int buf_y_offset;
 415     int stride_line; ///< Stride measured in lines, not pixels.
 416     x_and_coeff * x_coeff;
 417     struct SubBand *parent;
 418     uint8_t state[/*7*2*/ 7 + 512][32];
 419 }SubBand;
 420
 421 typedef struct Plane{
 422     int width;
 423     int height;
 424     SubBand band[MAX_DECOMPOSITIONS][4];
 425
 426     int htaps;
 427     int8_t hcoeff[HTAPS_MAX/2];
 428     int diag_mc;
 429     int fast_mc;
 430
 431     int last_htaps;
 432     int8_t last_hcoeff[HTAPS_MAX/2];
 433     int last_diag_mc;
 434 }Plane;
 435
 436 typedef struct SnowContext{
 437 //    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 438
 439     AVCodecContext *avctx;
 440     RangeCoder c;
 441     DSPContext dsp;
 442     AVFrame new_picture;
 443     AVFrame input_picture;              ///< new_picture with the internal linesizes
 444     AVFrame current_picture;
 445     AVFrame last_picture[MAX_REF_FRAMES];
 446     uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
 447     AVFrame mconly_picture;
 448 //     uint8_t q_context[16];
 449     uint8_t header_state[32];
 450     uint8_t block_state[128 + 32*128];
 451     int keyframe;
 452     int always_reset;
 453     int version;
 454     int spatial_decomposition_type;
 455     int last_spatial_decomposition_type;
 456     int temporal_decomposition_type;
 457     int spatial_decomposition_count;
 458     int last_spatial_decomposition_count;
 459     int temporal_decomposition_count;
 460     int max_ref_frames;
 461     int ref_frames;
 462     int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
 463     uint32_t *ref_scores[MAX_REF_FRAMES];
 464     DWTELEM *spatial_dwt_buffer;
 465     IDWTELEM *spatial_idwt_buffer;
 466     int colorspace_type;
 467     int chroma_h_shift;
 468     int chroma_v_shift;
 469     int spatial_scalability;
 470     int qlog;
 471     int last_qlog;
 472     int lambda;
 473     int lambda2;
 474     int pass1_rc;
 475     int mv_scale;
 476     int last_mv_scale;
 477     int qbias;
 478     int last_qbias;
 479 #define QBIAS_SHIFT 3
 480     int b_width;
 481     int b_height;
 482     int block_max_depth;
 483     int last_block_max_depth;
 484     Plane plane[MAX_PLANES];
 485     BlockNode *block;
 486 #define ME_CACHE_SIZE 1024
 487     int me_cache[ME_CACHE_SIZE];
 488     int me_cache_generation;
 489     slice_buffer sb;
 490
 491     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 492
 493     uint8_t *scratchbuf;
 494 }SnowContext;
 495
 496 typedef struct {
 497     IDWTELEM *b0;
 498     IDWTELEM *b1;
 499     IDWTELEM *b2;
 500     IDWTELEM *b3;
 501     int y;
 502 } DWTCompose;
 503
 504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
 505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
 506
 507 static void iterative_me(SnowContext *s);
 508
 509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
 510 {
 511     int i;
 512
 513     buf->base_buffer = base_buffer;
 514     buf->line_count = line_count;
 515     buf->line_width = line_width;
 516     buf->data_count = max_allocated_lines;
 517     buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
 518     buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
 519
 520     for(i = 0; i < max_allocated_lines; i++){
 521         buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
 522     }
 523
 524     buf->data_stack_top = max_allocated_lines - 1;
 525 }
 526
 527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
 528 {
 529     int offset;
 530     IDWTELEM * buffer;
 531
 532     assert(buf->data_stack_top >= 0);
 533 //  assert(!buf->line[line]);
 534     if (buf->line[line])
 535         return buf->line[line];
 536
 537     offset = buf->line_width * line;
 538     buffer = buf->data_stack[buf->data_stack_top];
 539     buf->data_stack_top--;
 540     buf->line[line] = buffer;
 541
 542     return buffer;
 543 }
 544
 545 static void slice_buffer_release(slice_buffer * buf, int line)
 546 {
 547     int offset;
 548     IDWTELEM * buffer;
 549
 550     assert(line >= 0 && line < buf->line_count);
 551     assert(buf->line[line]);
 552
 553     offset = buf->line_width * line;
 554     buffer = buf->line[line];
 555     buf->data_stack_top++;
 556     buf->data_stack[buf->data_stack_top] = buffer;
 557     buf->line[line] = NULL;
 558 }
 559
 560 static void slice_buffer_flush(slice_buffer * buf)
 561 {
 562     int i;
 563     for(i = 0; i < buf->line_count; i++){
 564         if (buf->line[i])
 565             slice_buffer_release(buf, i);
 566     }
 567 }
 568
 569 static void slice_buffer_destroy(slice_buffer * buf)
 570 {
 571     int i;
 572     slice_buffer_flush(buf);
 573
 574     for(i = buf->data_count - 1; i >= 0; i--){
 575         av_freep(&buf->data_stack[i]);
 576     }
 577     av_freep(&buf->data_stack);
 578     av_freep(&buf->line);
 579 }
 580
 581 #ifdef __sgi
 582 // Avoid a name clash on SGI IRIX
 583 #undef qexp
 584 #endif
 585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
 586 static uint8_t qexp[QROOT];
 587
 588 static inline int mirror(int v, int m){
 589     while((unsigned)v > (unsigned)m){
 590         v=-v;
 591         if(v<0) v+= 2*m;
 592     }
 593     return v;
 594 }
 595
 596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
 597     int i;
 598
 599     if(v){
 600         const int a= FFABS(v);
 601         const int e= av_log2(a);
 602 #if 1
 603         const int el= FFMIN(e, 10);
 604         put_rac(c, state+0, 0);
 605
 606         for(i=0; i<el; i++){
 607             put_rac(c, state+1+i, 1);  //1..10
 608         }
 609         for(; i<e; i++){
 610             put_rac(c, state+1+9, 1);  //1..10
 611         }
 612         put_rac(c, state+1+FFMIN(i,9), 0);
 613
 614         for(i=e-1; i>=el; i--){
 615             put_rac(c, state+22+9, (a>>i)&1); //22..31
 616         }
 617         for(; i>=0; i--){
 618             put_rac(c, state+22+i, (a>>i)&1); //22..31
 619         }
 620
 621         if(is_signed)
 622             put_rac(c, state+11 + el, v < 0); //11..21
 623 #else
 624
 625         put_rac(c, state+0, 0);
 626         if(e<=9){
 627             for(i=0; i<e; i++){
 628                 put_rac(c, state+1+i, 1);  //1..10
 629             }
 630             put_rac(c, state+1+i, 0);
 631
 632             for(i=e-1; i>=0; i--){
 633                 put_rac(c, state+22+i, (a>>i)&1); //22..31
 634             }
 635
 636             if(is_signed)
 637                 put_rac(c, state+11 + e, v < 0); //11..21
 638         }else{
 639             for(i=0; i<e; i++){
 640                 put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
 641             }
 642             put_rac(c, state+1+FFMIN(i,9), 0);
 643
 644             for(i=e-1; i>=0; i--){
 645                 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
 646             }
 647
 648             if(is_signed)
 649                 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
 650         }
 651 #endif /* 1 */
 652     }else{
 653         put_rac(c, state+0, 1);
 654     }
 655 }
 656
 657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
 658     if(get_rac(c, state+0))
 659         return 0;
 660     else{
 661         int i, e, a;
 662         e= 0;
 663         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
 664             e++;
 665         }
 666
 667         a= 1;
 668         for(i=e-1; i>=0; i--){
 669             a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
 670         }
 671
 672         if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
 673             return -a;
 674         else
 675             return a;
 676     }
 677 }
 678
 679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
 680     int i;
 681     int r= log2>=0 ? 1<<log2 : 1;
 682
 683     assert(v>=0);
 684     assert(log2>=-4);
 685
 686     while(v >= r){
 687         put_rac(c, state+4+log2, 1);
 688         v -= r;
 689         log2++;
 690         if(log2>0) r+=r;
 691     }
 692     put_rac(c, state+4+log2, 0);
 693
 694     for(i=log2-1; i>=0; i--){
 695         put_rac(c, state+31-i, (v>>i)&1);
 696     }
 697 }
 698
 699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
 700     int i;
 701     int r= log2>=0 ? 1<<log2 : 1;
 702     int v=0;
 703
 704     assert(log2>=-4);
 705
 706     while(get_rac(c, state+4+log2)){
 707         v+= r;
 708         log2++;
 709         if(log2>0) r+=r;
 710     }
 711
 712     for(i=log2-1; i>=0; i--){
 713         v+= get_rac(c, state+31-i)<<i;
 714     }
 715
 716     return v;
 717 }
 718
 719 static av_always_inline void
 720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
 721      int dst_step, int src_step, int ref_step,
 722      int width, int mul, int add, int shift,
 723      int highpass, int inverse){
 724     const int mirror_left= !highpass;
 725     const int mirror_right= (width&1) ^ highpass;
 726     const int w= (width>>1) - 1 + (highpass & width);
 727     int i;
 728
 729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
 730     if(mirror_left){
 731         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
 732         dst += dst_step;
 733         src += src_step;
 734     }
 735
 736     for(i=0; i<w; i++){
 737         dst[i*dst_step] =
 738             LIFT(src[i*src_step],
 739                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
 740                  inverse);
 741     }
 742
 743     if(mirror_right){
 744         dst[w*dst_step] =
 745             LIFT(src[w*src_step],
 746                  ((mul*2*ref[w*ref_step]+add)>>shift),
 747                  inverse);
 748     }
 749 }
 750
 751 static av_always_inline void
 752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
 753          int dst_step, int src_step, int ref_step,
 754          int width, int mul, int add, int shift,
 755          int highpass, int inverse){
 756     const int mirror_left= !highpass;
 757     const int mirror_right= (width&1) ^ highpass;
 758     const int w= (width>>1) - 1 + (highpass & width);
 759     int i;
 760
 761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
 762     if(mirror_left){
 763         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
 764         dst += dst_step;
 765         src += src_step;
 766     }
 767
 768     for(i=0; i<w; i++){
 769         dst[i*dst_step] =
 770             LIFT(src[i*src_step],
 771                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
 772                  inverse);
 773     }
 774
 775     if(mirror_right){
 776         dst[w*dst_step] =
 777             LIFT(src[w*src_step],
 778                  ((mul*2*ref[w*ref_step]+add)>>shift),
 779                  inverse);
 780     }
 781 }
 782
 783 #ifndef liftS
 784 static av_always_inline void
 785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
 786       int dst_step, int src_step, int ref_step,
 787       int width, int mul, int add, int shift,
 788       int highpass, int inverse){
 789     const int mirror_left= !highpass;
 790     const int mirror_right= (width&1) ^ highpass;
 791     const int w= (width>>1) - 1 + (highpass & width);
 792     int i;
 793
 794     assert(shift == 4);
 795 #define LIFTS(src, ref, inv) \
 796         ((inv) ? \
 797             (src) + (((ref) + 4*(src))>>shift): \
 798             -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
 799     if(mirror_left){
 800         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
 801         dst += dst_step;
 802         src += src_step;
 803     }
 804
 805     for(i=0; i<w; i++){
 806         dst[i*dst_step] =
 807             LIFTS(src[i*src_step],
 808                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
 809                   inverse);
 810     }
 811
 812     if(mirror_right){
 813         dst[w*dst_step] =
 814             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
 815     }
 816 }
 817 static av_always_inline void
 818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
 819           int dst_step, int src_step, int ref_step,
 820           int width, int mul, int add, int shift,
 821           int highpass, int inverse){
 822     const int mirror_left= !highpass;
 823     const int mirror_right= (width&1) ^ highpass;
 824     const int w= (width>>1) - 1 + (highpass & width);
 825     int i;
 826
 827     assert(shift == 4);
 828 #define LIFTS(src, ref, inv) \
 829     ((inv) ? \
 830         (src) + (((ref) + 4*(src))>>shift): \
 831         -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
 832     if(mirror_left){
 833         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
 834         dst += dst_step;
 835         src += src_step;
 836     }
 837
 838     for(i=0; i<w; i++){
 839         dst[i*dst_step] =
 840             LIFTS(src[i*src_step],
 841                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
 842                   inverse);
 843     }
 844
 845     if(mirror_right){
 846         dst[w*dst_step] =
 847             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
 848     }
 849 }
 850 #endif /* ! liftS */
 851
 852 static void horizontal_decompose53i(DWTELEM *b, int width){
 853     DWTELEM temp[width];
 854     const int width2= width>>1;
 855     int x;
 856     const int w2= (width+1)>>1;
 857
 858     for(x=0; x<width2; x++){
 859         temp[x   ]= b[2*x    ];
 860         temp[x+w2]= b[2*x + 1];
 861     }
 862     if(width&1)
 863         temp[x   ]= b[2*x    ];
 864 #if 0
 865     {
 866     int A1,A2,A3,A4;
 867     A2= temp[1       ];
 868     A4= temp[0       ];
 869     A1= temp[0+width2];
 870     A1 -= (A2 + A4)>>1;
 871     A4 += (A1 + 1)>>1;
 872     b[0+width2] = A1;
 873     b[0       ] = A4;
 874     for(x=1; x+1<width2; x+=2){
 875         A3= temp[x+width2];
 876         A4= temp[x+1     ];
 877         A3 -= (A2 + A4)>>1;
 878         A2 += (A1 + A3 + 2)>>2;
 879         b[x+width2] = A3;
 880         b[x       ] = A2;
 881
 882         A1= temp[x+1+width2];
 883         A2= temp[x+2       ];
 884         A1 -= (A2 + A4)>>1;
 885         A4 += (A1 + A3 + 2)>>2;
 886         b[x+1+width2] = A1;
 887         b[x+1       ] = A4;
 888     }
 889     A3= temp[width-1];
 890     A3 -= A2;
 891     A2 += (A1 + A3 + 2)>>2;
 892     b[width -1] = A3;
 893     b[width2-1] = A2;
 894     }
 895 #else
 896     lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
 897     lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
 898 #endif /* 0 */
 899 }
 900
 901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 902     int i;
 903
 904     for(i=0; i<width; i++){
 905         b1[i] -= (b0[i] + b2[i])>>1;
 906     }
 907 }
 908
 909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 910     int i;
 911
 912     for(i=0; i<width; i++){
 913         b1[i] += (b0[i] + b2[i] + 2)>>2;
 914     }
 915 }
 916
 917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
 918     int y;
 919     DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
 920     DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
 921
 922     for(y=-2; y<height; y+=2){
 923         DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
 924         DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
 925
 926         if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
 927         if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
 928
 929         if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
 930         if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
 931
 932         b0=b2;
 933         b1=b3;
 934     }
 935 }
 936
 937 static void horizontal_decompose97i(DWTELEM *b, int width){
 938     DWTELEM temp[width];
 939     const int w2= (width+1)>>1;
 940
 941     lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
 942     liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
 943     lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
 944     lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
 945 }
 946
 947
 948 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 949     int i;
 950
 951     for(i=0; i<width; i++){
 952         b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
 953     }
 954 }
 955
 956 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 957     int i;
 958
 959     for(i=0; i<width; i++){
 960         b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
 961     }
 962 }
 963
 964 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 965     int i;
 966
 967     for(i=0; i<width; i++){
 968 #ifdef liftS
 969         b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
 970 #else
 971         b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
 972 #endif
 973     }
 974 }
 975
 976 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 977     int i;
 978
 979     for(i=0; i<width; i++){
 980         b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
 981     }
 982 }
 983
 984 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
 985     int y;
 986     DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
 987     DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
 988     DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
 989     DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
 990
 991     for(y=-4; y<height; y+=2){
 992         DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
 993         DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
 994
 995         if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
 996         if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
 997
 998         if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
 999         if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1000         if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1001         if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1002
1003         b0=b2;
1004         b1=b3;
1005         b2=b4;
1006         b3=b5;
1007     }
1008 }
1009
1010 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1011     int level;
1012
1013     for(level=0; level<decomposition_count; level++){
1014         switch(type){
1015         case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1016         case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1017         }
1018     }
1019 }
1020
1021 static void horizontal_compose53i(IDWTELEM *b, int width){
1022     IDWTELEM temp[width];
1023     const int width2= width>>1;
1024     const int w2= (width+1)>>1;
1025     int x;
1026
1027 #if 0
1028     int A1,A2,A3,A4;
1029     A2= temp[1       ];
1030     A4= temp[0       ];
1031     A1= temp[0+width2];
1032     A1 -= (A2 + A4)>>1;
1033     A4 += (A1 + 1)>>1;
1034     b[0+width2] = A1;
1035     b[0       ] = A4;
1036     for(x=1; x+1<width2; x+=2){
1037         A3= temp[x+width2];
1038         A4= temp[x+1     ];
1039         A3 -= (A2 + A4)>>1;
1040         A2 += (A1 + A3 + 2)>>2;
1041         b[x+width2] = A3;
1042         b[x       ] = A2;
1043
1044         A1= temp[x+1+width2];
1045         A2= temp[x+2       ];
1046         A1 -= (A2 + A4)>>1;
1047         A4 += (A1 + A3 + 2)>>2;
1048         b[x+1+width2] = A1;
1049         b[x+1       ] = A4;
1050     }
1051     A3= temp[width-1];
1052     A3 -= A2;
1053     A2 += (A1 + A3 + 2)>>2;
1054     b[width -1] = A3;
1055     b[width2-1] = A2;
1056 #else
1057     inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1058     inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1059 #endif /* 0 */
1060     for(x=0; x<width2; x++){
1061         b[2*x    ]= temp[x   ];
1062         b[2*x + 1]= temp[x+w2];
1063     }
1064     if(width&1)
1065         b[2*x    ]= temp[x   ];
1066 }
1067
1068 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1069     int i;
1070
1071     for(i=0; i<width; i++){
1072         b1[i] += (b0[i] + b2[i])>>1;
1073     }
1074 }
1075
1076 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1077     int i;
1078
1079     for(i=0; i<width; i++){
1080         b1[i] -= (b0[i] + b2[i] + 2)>>2;
1081     }
1082 }
1083
1084 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1085     cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1086     cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1087     cs->y = -1;
1088 }
1089
1090 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1091     cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1092     cs->b1 = buffer + mirror(-1  , height-1)*stride;
1093     cs->y = -1;
1094 }
1095
1096 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1097     int y= cs->y;
1098
1099     IDWTELEM *b0= cs->b0;
1100     IDWTELEM *b1= cs->b1;
1101     IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1102     IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1103
1104         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1105         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1106
1107         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1108         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1109
1110     cs->b0 = b2;
1111     cs->b1 = b3;
1112     cs->y += 2;
1113 }
1114
1115 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1116     int y= cs->y;
1117     IDWTELEM *b0= cs->b0;
1118     IDWTELEM *b1= cs->b1;
1119     IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1120     IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1121
1122         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1123         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1124
1125         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1126         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1127
1128     cs->b0 = b2;
1129     cs->b1 = b3;
1130     cs->y += 2;
1131 }
1132
1133 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1134     DWTCompose cs;
1135     spatial_compose53i_init(&cs, buffer, height, stride);
1136     while(cs.y <= height)
1137         spatial_compose53i_dy(&cs, buffer, width, height, stride);
1138 }
1139
1140
1141 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1142     IDWTELEM temp[width];
1143     const int w2= (width+1)>>1;
1144
1145     inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1146     inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1147     inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1148     inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1149 }
1150
1151 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1152     int i;
1153
1154     for(i=0; i<width; i++){
1155         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1156     }
1157 }
1158
1159 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1160     int i;
1161
1162     for(i=0; i<width; i++){
1163         b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1164     }
1165 }
1166
1167 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1168     int i;
1169
1170     for(i=0; i<width; i++){
1171 #ifdef liftS
1172         b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1173 #else
1174         b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1175 #endif
1176     }
1177 }
1178
1179 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1180     int i;
1181
1182     for(i=0; i<width; i++){
1183         b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1184     }
1185 }
1186
1187 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1188     int i;
1189
1190     for(i=0; i<width; i++){
1191         b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1192         b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1193 #ifdef liftS
1194         b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1195 #else
1196         b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1197 #endif
1198         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1199     }
1200 }
1201
1202 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1203     cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1204     cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1205     cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1206     cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1207     cs->y = -3;
1208 }
1209
1210 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1211     cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1212     cs->b1 = buffer + mirror(-3  , height-1)*stride;
1213     cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1214     cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1215     cs->y = -3;
1216 }
1217
1218 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1219     int y = cs->y;
1220
1221     IDWTELEM *b0= cs->b0;
1222     IDWTELEM *b1= cs->b1;
1223     IDWTELEM *b2= cs->b2;
1224     IDWTELEM *b3= cs->b3;
1225     IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1226     IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1227
1228     if(y>0 && y+4<height){
1229         dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1230     }else{
1231         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1232         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1233         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1234         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1235     }
1236
1237         if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1238         if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1239
1240     cs->b0=b2;
1241     cs->b1=b3;
1242     cs->b2=b4;
1243     cs->b3=b5;
1244     cs->y += 2;
1245 }
1246
1247 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1248     int y = cs->y;
1249     IDWTELEM *b0= cs->b0;
1250     IDWTELEM *b1= cs->b1;
1251     IDWTELEM *b2= cs->b2;
1252     IDWTELEM *b3= cs->b3;
1253     IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254     IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1255
1256         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1257         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1258         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1259         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1260
1261         if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1262         if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1263
1264     cs->b0=b2;
1265     cs->b1=b3;
1266     cs->b2=b4;
1267     cs->b3=b5;
1268     cs->y += 2;
1269 }
1270
1271 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1272     DWTCompose cs;
1273     spatial_compose97i_init(&cs, buffer, height, stride);
1274     while(cs.y <= height)
1275         spatial_compose97i_dy(&cs, buffer, width, height, stride);
1276 }
1277
1278 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1279     int level;
1280     for(level=decomposition_count-1; level>=0; level--){
1281         switch(type){
1282         case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1283         case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1284         }
1285     }
1286 }
1287
1288 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1289     int level;
1290     for(level=decomposition_count-1; level>=0; level--){
1291         switch(type){
1292         case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1293         case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1294         }
1295     }
1296 }
1297
1298 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1299     const int support = type==1 ? 3 : 5;
1300     int level;
1301     if(type==2) return;
1302
1303     for(level=decomposition_count-1; level>=0; level--){
1304         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1305             switch(type){
1306             case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1307                 break;
1308             case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1309                 break;
1310             }
1311         }
1312     }
1313 }
1314
1315 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1316     const int support = type==1 ? 3 : 5;
1317     int level;
1318     if(type==2) return;
1319
1320     for(level=decomposition_count-1; level>=0; level--){
1321         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1322             switch(type){
1323             case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1324                 break;
1325             case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1326                 break;
1327             }
1328         }
1329     }
1330 }
1331
1332 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1333         DWTCompose cs[MAX_DECOMPOSITIONS];
1334         int y;
1335         ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1336         for(y=0; y<height; y+=4)
1337             ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1338 }
1339
1340 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1341     const int w= b->width;
1342     const int h= b->height;
1343     int x, y;
1344
1345     if(1){
1346         int run=0;
1347         int runs[w*h];
1348         int run_index=0;
1349         int max_index;
1350
1351         for(y=0; y<h; y++){
1352             for(x=0; x<w; x++){
1353                 int v, p=0;
1354                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1355                 v= src[x + y*stride];
1356
1357                 if(y){
1358                     t= src[x + (y-1)*stride];
1359                     if(x){
1360                         lt= src[x - 1 + (y-1)*stride];
1361                     }
1362                     if(x + 1 < w){
1363                         rt= src[x + 1 + (y-1)*stride];
1364                     }
1365                 }
1366                 if(x){
1367                     l= src[x - 1 + y*stride];
1368                     /*if(x > 1){
1369                         if(orientation==1) ll= src[y + (x-2)*stride];
1370                         else               ll= src[x - 2 + y*stride];
1371                     }*/
1372                 }
1373                 if(parent){
1374                     int px= x>>1;
1375                     int py= y>>1;
1376                     if(px<b->parent->width && py<b->parent->height)
1377                         p= parent[px + py*2*stride];
1378                 }
1379                 if(!(/*ll|*/l|lt|t|rt|p)){
1380                     if(v){
1381                         runs[run_index++]= run;
1382                         run=0;
1383                     }else{
1384                         run++;
1385                     }
1386                 }
1387             }
1388         }
1389         max_index= run_index;
1390         runs[run_index++]= run;
1391         run_index=0;
1392         run= runs[run_index++];
1393
1394         put_symbol2(&s->c, b->state[30], max_index, 0);
1395         if(run_index <= max_index)
1396             put_symbol2(&s->c, b->state[1], run, 3);
1397
1398         for(y=0; y<h; y++){
1399             if(s->c.bytestream_end - s->c.bytestream < w*40){
1400                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1401                 return -1;
1402             }
1403             for(x=0; x<w; x++){
1404                 int v, p=0;
1405                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1406                 v= src[x + y*stride];
1407
1408                 if(y){
1409                     t= src[x + (y-1)*stride];
1410                     if(x){
1411                         lt= src[x - 1 + (y-1)*stride];
1412                     }
1413                     if(x + 1 < w){
1414                         rt= src[x + 1 + (y-1)*stride];
1415                     }
1416                 }
1417                 if(x){
1418                     l= src[x - 1 + y*stride];
1419                     /*if(x > 1){
1420                         if(orientation==1) ll= src[y + (x-2)*stride];
1421                         else               ll= src[x - 2 + y*stride];
1422                     }*/
1423                 }
1424                 if(parent){
1425                     int px= x>>1;
1426                     int py= y>>1;
1427                     if(px<b->parent->width && py<b->parent->height)
1428                         p= parent[px + py*2*stride];
1429                 }
1430                 if(/*ll|*/l|lt|t|rt|p){
1431                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1432
1433                     put_rac(&s->c, &b->state[0][context], !!v);
1434                 }else{
1435                     if(!run){
1436                         run= runs[run_index++];
1437
1438                         if(run_index <= max_index)
1439                             put_symbol2(&s->c, b->state[1], run, 3);
1440                         assert(v);
1441                     }else{
1442                         run--;
1443                         assert(!v);
1444                     }
1445                 }
1446                 if(v){
1447                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1448                     int l2= 2*FFABS(l) + (l<0);
1449                     int t2= 2*FFABS(t) + (t<0);
1450
1451                     put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1452                     put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1453                 }
1454             }
1455         }
1456     }
1457     return 0;
1458 }
1459
1460 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1461 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
1462 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
1463     return encode_subband_c0run(s, b, src, parent, stride, orientation);
1464 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
1465 }
1466
1467 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1468     const int w= b->width;
1469     const int h= b->height;
1470     int x,y;
1471
1472     if(1){
1473         int run, runs;
1474         x_and_coeff *xc= b->x_coeff;
1475         x_and_coeff *prev_xc= NULL;
1476         x_and_coeff *prev2_xc= xc;
1477         x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1478         x_and_coeff *prev_parent_xc= parent_xc;
1479
1480         runs= get_symbol2(&s->c, b->state[30], 0);
1481         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1482         else           run= INT_MAX;
1483
1484         for(y=0; y<h; y++){
1485             int v=0;
1486             int lt=0, t=0, rt=0;
1487
1488             if(y && prev_xc->x == 0){
1489                 rt= prev_xc->coeff;
1490             }
1491             for(x=0; x<w; x++){
1492                 int p=0;
1493                 const int l= v;
1494
1495                 lt= t; t= rt;
1496
1497                 if(y){
1498                     if(prev_xc->x <= x)
1499                         prev_xc++;
1500                     if(prev_xc->x == x + 1)
1501                         rt= prev_xc->coeff;
1502                     else
1503                         rt=0;
1504                 }
1505                 if(parent_xc){
1506                     if(x>>1 > parent_xc->x){
1507                         parent_xc++;
1508                     }
1509                     if(x>>1 == parent_xc->x){
1510                         p= parent_xc->coeff;
1511                     }
1512                 }
1513                 if(/*ll|*/l|lt|t|rt|p){
1514                     int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1515
1516                     v=get_rac(&s->c, &b->state[0][context]);
1517                     if(v){
1518                         v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1519                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1520
1521                         xc->x=x;
1522                         (xc++)->coeff= v;
1523                     }
1524                 }else{
1525                     if(!run){
1526                         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1527                         else           run= INT_MAX;
1528                         v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1529                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1530
1531                         xc->x=x;
1532                         (xc++)->coeff= v;
1533                     }else{
1534                         int max_run;
1535                         run--;
1536                         v=0;
1537
1538                         if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1539                         else  max_run= FFMIN(run, w-x-1);
1540                         if(parent_xc)
1541                             max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1542                         x+= max_run;
1543                         run-= max_run;
1544                     }
1545                 }
1546             }
1547             (xc++)->x= w+1; //end marker
1548             prev_xc= prev2_xc;
1549             prev2_xc= xc;
1550
1551             if(parent_xc){
1552                 if(y&1){
1553                     while(parent_xc->x != parent->width+1)
1554                         parent_xc++;
1555                     parent_xc++;
1556                     prev_parent_xc= parent_xc;
1557                 }else{
1558                     parent_xc= prev_parent_xc;
1559                 }
1560             }
1561         }
1562
1563         (xc++)->x= w+1; //end marker
1564     }
1565 }
1566
1567 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1568     const int w= b->width;
1569     int y;
1570     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1571     int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1572     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1573     int new_index = 0;
1574
1575     if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1576         qadd= 0;
1577         qmul= 1<<QEXPSHIFT;
1578     }
1579
1580     /* If we are on the second or later slice, restore our index. */
1581     if (start_y != 0)
1582         new_index = save_state[0];
1583
1584
1585     for(y=start_y; y<h; y++){
1586         int x = 0;
1587         int v;
1588         IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1589         memset(line, 0, b->width*sizeof(IDWTELEM));
1590         v = b->x_coeff[new_index].coeff;
1591         x = b->x_coeff[new_index++].x;
1592         while(x < w){
1593             register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1594             register int u= -(v&1);
1595             line[x] = (t^u) - u;
1596
1597             v = b->x_coeff[new_index].coeff;
1598             x = b->x_coeff[new_index++].x;
1599         }
1600     }
1601
1602     /* Save our variables for the next slice. */
1603     save_state[0] = new_index;
1604
1605     return;
1606 }
1607
1608 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1609     int plane_index, level, orientation;
1610
1611     for(plane_index=0; plane_index<3; plane_index++){
1612         for(level=0; level<MAX_DECOMPOSITIONS; level++){
1613             for(orientation=level ? 1:0; orientation<4; orientation++){
1614                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1615             }
1616         }
1617     }
1618     memset(s->header_state, MID_STATE, sizeof(s->header_state));
1619     memset(s->block_state, MID_STATE, sizeof(s->block_state));
1620 }
1621
1622 static int alloc_blocks(SnowContext *s){
1623     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1624     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1625
1626     s->b_width = w;
1627     s->b_height= h;
1628
1629     av_free(s->block);
1630     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1631     return 0;
1632 }
1633
1634 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1635     uint8_t *bytestream= d->bytestream;
1636     uint8_t *bytestream_start= d->bytestream_start;
1637     *d= *s;
1638     d->bytestream= bytestream;
1639     d->bytestream_start= bytestream_start;
1640 }
1641
1642 //near copy & paste from dsputil, FIXME
1643 static int pix_sum(uint8_t * pix, int line_size, int w)
1644 {
1645     int s, i, j;
1646
1647     s = 0;
1648     for (i = 0; i < w; i++) {
1649         for (j = 0; j < w; j++) {
1650             s += pix[0];
1651             pix ++;
1652         }
1653         pix += line_size - w;
1654     }
1655     return s;
1656 }
1657
1658 //near copy & paste from dsputil, FIXME
1659 static int pix_norm1(uint8_t * pix, int line_size, int w)
1660 {
1661     int s, i, j;
1662     uint32_t *sq = ff_squareTbl + 256;
1663
1664     s = 0;
1665     for (i = 0; i < w; i++) {
1666         for (j = 0; j < w; j ++) {
1667             s += sq[pix[0]];
1668             pix ++;
1669         }
1670         pix += line_size - w;
1671     }
1672     return s;
1673 }
1674
1675 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1676     const int w= s->b_width << s->block_max_depth;
1677     const int rem_depth= s->block_max_depth - level;
1678     const int index= (x + y*w) << rem_depth;
1679     const int block_w= 1<<rem_depth;
1680     BlockNode block;
1681     int i,j;
1682
1683     block.color[0]= l;
1684     block.color[1]= cb;
1685     block.color[2]= cr;
1686     block.mx= mx;
1687     block.my= my;
1688     block.ref= ref;
1689     block.type= type;
1690     block.level= level;
1691
1692     for(j=0; j<block_w; j++){
1693         for(i=0; i<block_w; i++){
1694             s->block[index + i + j*w]= block;
1695         }
1696     }
1697 }
1698
1699 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1700     const int offset[3]= {
1701           y*c->  stride + x,
1702         ((y*c->uvstride + x)>>1),
1703         ((y*c->uvstride + x)>>1),
1704     };
1705     int i;
1706     for(i=0; i<3; i++){
1707         c->src[0][i]= src [i];
1708         c->ref[0][i]= ref [i] + offset[i];
1709     }
1710     assert(!ref_index);
1711 }
1712
1713 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1714                            const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1715     if(s->ref_frames == 1){
1716         *mx = mid_pred(left->mx, top->mx, tr->mx);
1717         *my = mid_pred(left->my, top->my, tr->my);
1718     }else{
1719         const int *scale = scale_mv_ref[ref];
1720         *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1721                        (top ->mx * scale[top ->ref] + 128) >>8,
1722                        (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1723         *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1724                        (top ->my * scale[top ->ref] + 128) >>8,
1725                        (tr  ->my * scale[tr  ->ref] + 128) >>8);
1726     }
1727 }
1728
1729 //FIXME copy&paste
1730 #define P_LEFT P[1]
1731 #define P_TOP P[2]
1732 #define P_TOPRIGHT P[3]
1733 #define P_MEDIAN P[4]
1734 #define P_MV1 P[9]
1735 #define FLAG_QPEL   1 //must be 1
1736
1737 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1738     uint8_t p_buffer[1024];
1739     uint8_t i_buffer[1024];
1740     uint8_t p_state[sizeof(s->block_state)];
1741     uint8_t i_state[sizeof(s->block_state)];
1742     RangeCoder pc, ic;
1743     uint8_t *pbbak= s->c.bytestream;
1744     uint8_t *pbbak_start= s->c.bytestream_start;
1745     int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1746     const int w= s->b_width  << s->block_max_depth;
1747     const int h= s->b_height << s->block_max_depth;
1748     const int rem_depth= s->block_max_depth - level;
1749     const int index= (x + y*w) << rem_depth;
1750     const int block_w= 1<<(LOG2_MB_SIZE - level);
1751     int trx= (x+1)<<rem_depth;
1752     int try= (y+1)<<rem_depth;
1753     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1754     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1755     const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1756     const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1757     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1758     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1759     int pl = left->color[0];
1760     int pcb= left->color[1];
1761     int pcr= left->color[2];
1762     int pmx, pmy;
1763     int mx=0, my=0;
1764     int l,cr,cb;
1765     const int stride= s->current_picture.linesize[0];
1766     const int uvstride= s->current_picture.linesize[1];
1767     uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1768                                 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1769                                 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1770     int P[10][2];
1771     int16_t last_mv[3][2];
1772     int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1773     const int shift= 1+qpel;
1774     MotionEstContext *c= &s->m.me;
1775     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1776     int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1777     int my_context= av_log2(2*FFABS(left->my - top->my));
1778     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1779     int ref, best_ref, ref_score, ref_mx, ref_my;
1780
1781     assert(sizeof(s->block_state) >= 256);
1782     if(s->keyframe){
1783         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1784         return 0;
1785     }
1786
1787 //    clip predictors / edge ?
1788
1789     P_LEFT[0]= left->mx;
1790     P_LEFT[1]= left->my;
1791     P_TOP [0]= top->mx;
1792     P_TOP [1]= top->my;
1793     P_TOPRIGHT[0]= tr->mx;
1794     P_TOPRIGHT[1]= tr->my;
1795
1796     last_mv[0][0]= s->block[index].mx;
1797     last_mv[0][1]= s->block[index].my;
1798     last_mv[1][0]= right->mx;
1799     last_mv[1][1]= right->my;
1800     last_mv[2][0]= bottom->mx;
1801     last_mv[2][1]= bottom->my;
1802
1803     s->m.mb_stride=2;
1804     s->m.mb_x=
1805     s->m.mb_y= 0;
1806     c->skip= 0;
1807
1808     assert(c->  stride ==   stride);
1809     assert(c->uvstride == uvstride);
1810
1811     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1812     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1813     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1814     c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1815
1816     c->xmin = - x*block_w - 16+2;
1817     c->ymin = - y*block_w - 16+2;
1818     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1819     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1820
1821     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1822     if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1823     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1824     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1825     if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1826     if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1827     if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1828
1829     P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1830     P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1831
1832     if (!y) {
1833         c->pred_x= P_LEFT[0];
1834         c->pred_y= P_LEFT[1];
1835     } else {
1836         c->pred_x = P_MEDIAN[0];
1837         c->pred_y = P_MEDIAN[1];
1838     }
1839
1840     score= INT_MAX;
1841     best_ref= 0;
1842     for(ref=0; ref<s->ref_frames; ref++){
1843         init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1844
1845         ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1846                                          (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1847
1848         assert(ref_mx >= c->xmin);
1849         assert(ref_mx <= c->xmax);
1850         assert(ref_my >= c->ymin);
1851         assert(ref_my <= c->ymax);
1852
1853         ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1854         ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1855         ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1856         if(s->ref_mvs[ref]){
1857             s->ref_mvs[ref][index][0]= ref_mx;
1858             s->ref_mvs[ref][index][1]= ref_my;
1859             s->ref_scores[ref][index]= ref_score;
1860         }
1861         if(score > ref_score){
1862             score= ref_score;
1863             best_ref= ref;
1864             mx= ref_mx;
1865             my= ref_my;
1866         }
1867     }
1868     //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1869
1870   //  subpel search
1871     base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1872     pc= s->c;
1873     pc.bytestream_start=
1874     pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1875     memcpy(p_state, s->block_state, sizeof(s->block_state));
1876
1877     if(level!=s->block_max_depth)
1878         put_rac(&pc, &p_state[4 + s_context], 1);
1879     put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1880     if(s->ref_frames > 1)
1881         put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1882     pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1883     put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1884     put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1885     p_len= pc.bytestream - pc.bytestream_start;
1886     score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1887
1888     block_s= block_w*block_w;
1889     sum = pix_sum(current_data[0], stride, block_w);
1890     l= (sum + block_s/2)/block_s;
1891     iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1892
1893     block_s= block_w*block_w>>2;
1894     sum = pix_sum(current_data[1], uvstride, block_w>>1);
1895     cb= (sum + block_s/2)/block_s;
1896 //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1897     sum = pix_sum(current_data[2], uvstride, block_w>>1);
1898     cr= (sum + block_s/2)/block_s;
1899 //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1900
1901     ic= s->c;
1902     ic.bytestream_start=
1903     ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1904     memcpy(i_state, s->block_state, sizeof(s->block_state));
1905     if(level!=s->block_max_depth)
1906         put_rac(&ic, &i_state[4 + s_context], 1);
1907     put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1908     put_symbol(&ic, &i_state[32],  l-pl , 1);
1909     put_symbol(&ic, &i_state[64], cb-pcb, 1);
1910     put_symbol(&ic, &i_state[96], cr-pcr, 1);
1911     i_len= ic.bytestream - ic.bytestream_start;
1912     iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1913
1914 //    assert(score==256*256*256*64-1);
1915     assert(iscore < 255*255*256 + s->lambda2*10);
1916     assert(iscore >= 0);
1917     assert(l>=0 && l<=255);
1918     assert(pl>=0 && pl<=255);
1919
1920     if(level==0){
1921         int varc= iscore >> 8;
1922         int vard= score >> 8;
1923         if (vard <= 64 || vard < varc)
1924             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1925         else
1926             c->scene_change_score+= s->m.qscale;
1927     }
1928
1929     if(level!=s->block_max_depth){
1930         put_rac(&s->c, &s->block_state[4 + s_context], 0);
1931         score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1932         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1933         score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1934         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1935         score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1936
1937         if(score2 < score && score2 < iscore)
1938             return score2;
1939     }
1940
1941     if(iscore < score){
1942         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1943         memcpy(pbbak, i_buffer, i_len);
1944         s->c= ic;
1945         s->c.bytestream_start= pbbak_start;
1946         s->c.bytestream= pbbak + i_len;
1947         set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1948         memcpy(s->block_state, i_state, sizeof(s->block_state));
1949         return iscore;
1950     }else{
1951         memcpy(pbbak, p_buffer, p_len);
1952         s->c= pc;
1953         s->c.bytestream_start= pbbak_start;
1954         s->c.bytestream= pbbak + p_len;
1955         set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1956         memcpy(s->block_state, p_state, sizeof(s->block_state));
1957         return score;
1958     }
1959 }
1960
1961 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1962     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1963         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1964     }else{
1965         return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1966     }
1967 }
1968
1969 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1970     const int w= s->b_width  << s->block_max_depth;
1971     const int rem_depth= s->block_max_depth - level;
1972     const int index= (x + y*w) << rem_depth;
1973     int trx= (x+1)<<rem_depth;
1974     BlockNode *b= &s->block[index];
1975     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1976     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1977     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1978     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1979     int pl = left->color[0];
1980     int pcb= left->color[1];
1981     int pcr= left->color[2];
1982     int pmx, pmy;
1983     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1984     int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1985     int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1986     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1987
1988     if(s->keyframe){
1989         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1990         return;
1991     }
1992
1993     if(level!=s->block_max_depth){
1994         if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1995             put_rac(&s->c, &s->block_state[4 + s_context], 1);
1996         }else{
1997             put_rac(&s->c, &s->block_state[4 + s_context], 0);
1998             encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1999             encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2000             encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2001             encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2002             return;
2003         }
2004     }
2005     if(b->type & BLOCK_INTRA){
2006         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2007         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2008         put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2009         put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2010         put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2011         set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2012     }else{
2013         pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2014         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2015         if(s->ref_frames > 1)
2016             put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2017         put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2018         put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2019         set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2020     }
2021 }
2022
2023 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2024     const int w= s->b_width << s->block_max_depth;
2025     const int rem_depth= s->block_max_depth - level;
2026     const int index= (x + y*w) << rem_depth;
2027     int trx= (x+1)<<rem_depth;
2028     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2029     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2030     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2031     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2032     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2033
2034     if(s->keyframe){
2035         set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2036         return;
2037     }
2038
2039     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2040         int type, mx, my;
2041         int l = left->color[0];
2042         int cb= left->color[1];
2043         int cr= left->color[2];
2044         int ref = 0;
2045         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2046         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2047         int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2048
2049         type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2050
2051         if(type){
2052             pred_mv(s, &mx, &my, 0, left, top, tr);
2053             l += get_symbol(&s->c, &s->block_state[32], 1);
2054             cb+= get_symbol(&s->c, &s->block_state[64], 1);
2055             cr+= get_symbol(&s->c, &s->block_state[96], 1);
2056         }else{
2057             if(s->ref_frames > 1)
2058                 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2059             pred_mv(s, &mx, &my, ref, left, top, tr);
2060             mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2061             my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2062         }
2063         set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2064     }else{
2065         decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2066         decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2067         decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2068         decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2069     }
2070 }
2071
2072 static void encode_blocks(SnowContext *s, int search){
2073     int x, y;
2074     int w= s->b_width;
2075     int h= s->b_height;
2076
2077     if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2078         iterative_me(s);
2079
2080     for(y=0; y<h; y++){
2081         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2082             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2083             return;
2084         }
2085         for(x=0; x<w; x++){
2086             if(s->avctx->me_method == ME_ITER || !search)
2087                 encode_q_branch2(s, 0, x, y);
2088             else
2089                 encode_q_branch (s, 0, x, y);
2090         }
2091     }
2092 }
2093
2094 static void decode_blocks(SnowContext *s){
2095     int x, y;
2096     int w= s->b_width;
2097     int h= s->b_height;
2098
2099     for(y=0; y<h; y++){
2100         for(x=0; x<w; x++){
2101             decode_q_branch(s, 0, x, y);
2102         }
2103     }
2104 }
2105
2106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2107     static const uint8_t weight[64]={
2108     8,7,6,5,4,3,2,1,
2109     7,7,0,0,0,0,0,1,
2110     6,0,6,0,0,0,2,0,
2111     5,0,0,5,0,3,0,0,
2112     4,0,0,0,4,0,0,0,
2113     3,0,0,5,0,3,0,0,
2114     2,0,6,0,0,0,2,0,
2115     1,7,0,0,0,0,0,1,
2116     };
2117
2118     static const uint8_t brane[256]={
2119     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2120     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2121     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2122     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2123     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2124     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2125     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2126     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2127     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2128     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2129     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2130     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2131     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2132     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2133     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2134     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2135     };
2136
2137     static const uint8_t needs[16]={
2138     0,1,0,0,
2139     2,4,2,0,
2140     0,1,0,0,
2141     15
2142     };
2143
2144     int x, y, b, r, l;
2145     int16_t tmpIt   [64*(32+HTAPS_MAX)];
2146     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2147     int16_t *tmpI= tmpIt;
2148     uint8_t *tmp2= tmp2t[0];
2149     const uint8_t *hpel[11];
2150     assert(dx<16 && dy<16);
2151     r= brane[dx + 16*dy]&15;
2152     l= brane[dx + 16*dy]>>4;
2153
2154     b= needs[l] | needs[r];
2155     if(p && !p->diag_mc)
2156         b= 15;
2157
2158     if(b&5){
2159         for(y=0; y < b_h+HTAPS_MAX-1; y++){
2160             for(x=0; x < b_w; x++){
2161                 int a_1=src[x + HTAPS_MAX/2-4];
2162                 int a0= src[x + HTAPS_MAX/2-3];
2163                 int a1= src[x + HTAPS_MAX/2-2];
2164                 int a2= src[x + HTAPS_MAX/2-1];
2165                 int a3= src[x + HTAPS_MAX/2+0];
2166                 int a4= src[x + HTAPS_MAX/2+1];
2167                 int a5= src[x + HTAPS_MAX/2+2];
2168                 int a6= src[x + HTAPS_MAX/2+3];
2169                 int am=0;
2170                 if(!p || p->fast_mc){
2171                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2172                     tmpI[x]= am;
2173                     am= (am+16)>>5;
2174                 }else{
2175                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2176                     tmpI[x]= am;
2177                     am= (am+32)>>6;
2178                 }
2179
2180                 if(am&(~255)) am= ~(am>>31);
2181                 tmp2[x]= am;
2182             }
2183             tmpI+= 64;
2184             tmp2+= stride;
2185             src += stride;
2186         }
2187         src -= stride*y;
2188     }
2189     src += HTAPS_MAX/2 - 1;
2190     tmp2= tmp2t[1];
2191
2192     if(b&2){
2193         for(y=0; y < b_h; y++){
2194             for(x=0; x < b_w+1; x++){
2195                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2196                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2197                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2198                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2199                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2200                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2201                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2202                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2203                 int am=0;
2204                 if(!p || p->fast_mc)
2205                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2206                 else
2207                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2208
2209                 if(am&(~255)) am= ~(am>>31);
2210                 tmp2[x]= am;
2211             }
2212             src += stride;
2213             tmp2+= stride;
2214         }
2215         src -= stride*y;
2216     }
2217     src += stride*(HTAPS_MAX/2 - 1);
2218     tmp2= tmp2t[2];
2219     tmpI= tmpIt;
2220     if(b&4){
2221         for(y=0; y < b_h; y++){
2222             for(x=0; x < b_w; x++){
2223                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2224                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2225                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2226                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2227                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2228                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2229                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2230                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2231                 int am=0;
2232                 if(!p || p->fast_mc)
2233                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2234                 else
2235                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2236                 if(am&(~255)) am= ~(am>>31);
2237                 tmp2[x]= am;
2238             }
2239             tmpI+= 64;
2240             tmp2+= stride;
2241         }
2242     }
2243
2244     hpel[ 0]= src;
2245     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2246     hpel[ 2]= src + 1;
2247
2248     hpel[ 4]= tmp2t[1];
2249     hpel[ 5]= tmp2t[2];
2250     hpel[ 6]= tmp2t[1] + 1;
2251
2252     hpel[ 8]= src + stride;
2253     hpel[ 9]= hpel[1] + stride;
2254     hpel[10]= hpel[8] + 1;
2255
2256     if(b==15){
2257         const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2258         const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2259         const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2260         const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2261         dx&=7;
2262         dy&=7;
2263         for(y=0; y < b_h; y++){
2264             for(x=0; x < b_w; x++){
2265                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2266                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2267             }
2268             src1+=stride;
2269             src2+=stride;
2270             src3+=stride;
2271             src4+=stride;
2272             dst +=stride;
2273         }
2274     }else{
2275         const uint8_t *src1= hpel[l];
2276         const uint8_t *src2= hpel[r];
2277         int a= weight[((dx&7) + (8*(dy&7)))];
2278         int b= 8-a;
2279         for(y=0; y < b_h; y++){
2280             for(x=0; x < b_w; x++){
2281                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2282             }
2283             src1+=stride;
2284             src2+=stride;
2285             dst +=stride;
2286         }
2287     }
2288 }
2289
2290 #define mca(dx,dy,b_w)\
2291 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2292     uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2293     assert(h==b_w);\
2294     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2295 }
2296
2297 mca( 0, 0,16)
2298 mca( 8, 0,16)
2299 mca( 0, 8,16)
2300 mca( 8, 8,16)
2301 mca( 0, 0,8)
2302 mca( 8, 0,8)
2303 mca( 0, 8,8)
2304 mca( 8, 8,8)
2305
2306 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2307     if(block->type & BLOCK_INTRA){
2308         int x, y;
2309         const int color = block->color[plane_index];
2310         const int color4= color*0x01010101;
2311         if(b_w==32){
2312             for(y=0; y < b_h; y++){
2313                 *(uint32_t*)&dst[0 + y*stride]= color4;
2314                 *(uint32_t*)&dst[4 + y*stride]= color4;
2315                 *(uint32_t*)&dst[8 + y*stride]= color4;
2316                 *(uint32_t*)&dst[12+ y*stride]= color4;
2317                 *(uint32_t*)&dst[16+ y*stride]= color4;
2318                 *(uint32_t*)&dst[20+ y*stride]= color4;
2319                 *(uint32_t*)&dst[24+ y*stride]= color4;
2320                 *(uint32_t*)&dst[28+ y*stride]= color4;
2321             }
2322         }else if(b_w==16){
2323             for(y=0; y < b_h; y++){
2324                 *(uint32_t*)&dst[0 + y*stride]= color4;
2325                 *(uint32_t*)&dst[4 + y*stride]= color4;
2326                 *(uint32_t*)&dst[8 + y*stride]= color4;
2327                 *(uint32_t*)&dst[12+ y*stride]= color4;
2328             }
2329         }else if(b_w==8){
2330             for(y=0; y < b_h; y++){
2331                 *(uint32_t*)&dst[0 + y*stride]= color4;
2332                 *(uint32_t*)&dst[4 + y*stride]= color4;
2333             }
2334         }else if(b_w==4){
2335             for(y=0; y < b_h; y++){
2336                 *(uint32_t*)&dst[0 + y*stride]= color4;
2337             }
2338         }else{
2339             for(y=0; y < b_h; y++){
2340                 for(x=0; x < b_w; x++){
2341                     dst[x + y*stride]= color;
2342                 }
2343             }
2344         }
2345     }else{
2346         uint8_t *src= s->last_picture[block->ref].data[plane_index];
2347         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2348         int mx= block->mx*scale;
2349         int my= block->my*scale;
2350         const int dx= mx&15;
2351         const int dy= my&15;
2352         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2353         sx += (mx>>4) - (HTAPS_MAX/2-1);
2354         sy += (my>>4) - (HTAPS_MAX/2-1);
2355         src += sx + sy*stride;
2356         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2357            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2358             ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2359             src= tmp + MB_SIZE;
2360         }
2361 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2362 //        assert(!(b_w&(b_w-1)));
2363         assert(b_w>1 && b_h>1);
2364         assert((tab_index>=0 && tab_index<4) || b_w==32);
2365         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2366             mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2367         else if(b_w==32){
2368             int y;
2369             for(y=0; y<b_h; y+=16){
2370                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2371                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2372             }
2373         }else if(b_w==b_h)
2374             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2375         else if(b_w==2*b_h){
2376             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2377             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2378         }else{
2379             assert(2*b_w==b_h);
2380             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2381             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2382         }
2383     }
2384 }
2385
2386 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2387                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2388     int y, x;
2389     IDWTELEM * dst;
2390     for(y=0; y<b_h; y++){
2391         //FIXME ugly misuse of obmc_stride
2392         const uint8_t *obmc1= obmc + y*obmc_stride;
2393         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2394         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2395         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2396         dst = slice_buffer_get_line(sb, src_y + y);
2397         for(x=0; x<b_w; x++){
2398             int v=   obmc1[x] * block[3][x + y*src_stride]
2399                     +obmc2[x] * block[2][x + y*src_stride]
2400                     +obmc3[x] * block[1][x + y*src_stride]
2401                     +obmc4[x] * block[0][x + y*src_stride];
2402
2403             v <<= 8 - LOG2_OBMC_MAX;
2404             if(FRAC_BITS != 8){
2405                 v >>= 8 - FRAC_BITS;
2406             }
2407             if(add){
2408                 v += dst[x + src_x];
2409                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2410                 if(v&(~255)) v= ~(v>>31);
2411                 dst8[x + y*src_stride] = v;
2412             }else{
2413                 dst[x + src_x] -= v;
2414             }
2415         }
2416     }
2417 }
2418
2419 //FIXME name cleanup (b_w, block_w, b_width stuff)
2420 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2421     const int b_width = s->b_width  << s->block_max_depth;
2422     const int b_height= s->b_height << s->block_max_depth;
2423     const int b_stride= b_width;
2424     BlockNode *lt= &s->block[b_x + b_y*b_stride];
2425     BlockNode *rt= lt+1;
2426     BlockNode *lb= lt+b_stride;
2427     BlockNode *rb= lb+1;
2428     uint8_t *block[4];
2429     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2430     uint8_t *tmp = s->scratchbuf;
2431     uint8_t *ptmp;
2432     int x,y;
2433
2434     if(b_x<0){
2435         lt= rt;
2436         lb= rb;
2437     }else if(b_x + 1 >= b_width){
2438         rt= lt;
2439         rb= lb;
2440     }
2441     if(b_y<0){
2442         lt= lb;
2443         rt= rb;
2444     }else if(b_y + 1 >= b_height){
2445         lb= lt;
2446         rb= rt;
2447     }
2448
2449     if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2450         obmc -= src_x;
2451         b_w += src_x;
2452         if(!sliced && !offset_dst)
2453             dst -= src_x;
2454         src_x=0;
2455     }else if(src_x + b_w > w){
2456         b_w = w - src_x;
2457     }
2458     if(src_y<0){
2459         obmc -= src_y*obmc_stride;
2460         b_h += src_y;
2461         if(!sliced && !offset_dst)
2462             dst -= src_y*dst_stride;
2463         src_y=0;
2464     }else if(src_y + b_h> h){
2465         b_h = h - src_y;
2466     }
2467
2468     if(b_w<=0 || b_h<=0) return;
2469
2470     assert(src_stride > 2*MB_SIZE + 5);
2471
2472     if(!sliced && offset_dst)
2473         dst += src_x + src_y*dst_stride;
2474     dst8+= src_x + src_y*src_stride;
2475 //    src += src_x + src_y*src_stride;
2476
2477     ptmp= tmp + 3*tmp_step;
2478     block[0]= ptmp;
2479     ptmp+=tmp_step;
2480     pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2481
2482     if(same_block(lt, rt)){
2483         block[1]= block[0];
2484     }else{
2485         block[1]= ptmp;
2486         ptmp+=tmp_step;
2487         pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2488     }
2489
2490     if(same_block(lt, lb)){
2491         block[2]= block[0];
2492     }else if(same_block(rt, lb)){
2493         block[2]= block[1];
2494     }else{
2495         block[2]= ptmp;
2496         ptmp+=tmp_step;
2497         pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2498     }
2499
2500     if(same_block(lt, rb) ){
2501         block[3]= block[0];
2502     }else if(same_block(rt, rb)){
2503         block[3]= block[1];
2504     }else if(same_block(lb, rb)){
2505         block[3]= block[2];
2506     }else{
2507         block[3]= ptmp;
2508         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2509     }
2510 #if 0
2511     for(y=0; y<b_h; y++){
2512         for(x=0; x<b_w; x++){
2513             int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2514             if(add) dst[x + y*dst_stride] += v;
2515             else    dst[x + y*dst_stride] -= v;
2516         }
2517     }
2518     for(y=0; y<b_h; y++){
2519         uint8_t *obmc2= obmc + (obmc_stride>>1);
2520         for(x=0; x<b_w; x++){
2521             int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2522             if(add) dst[x + y*dst_stride] += v;
2523             else    dst[x + y*dst_stride] -= v;
2524         }
2525     }
2526     for(y=0; y<b_h; y++){
2527         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2528         for(x=0; x<b_w; x++){
2529             int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2530             if(add) dst[x + y*dst_stride] += v;
2531             else    dst[x + y*dst_stride] -= v;
2532         }
2533     }
2534     for(y=0; y<b_h; y++){
2535         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2536         uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2537         for(x=0; x<b_w; x++){
2538             int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2539             if(add) dst[x + y*dst_stride] += v;
2540             else    dst[x + y*dst_stride] -= v;
2541         }
2542     }
2543 #else
2544     if(sliced){
2545         s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2546     }else{
2547         for(y=0; y<b_h; y++){
2548             //FIXME ugly misuse of obmc_stride
2549             const uint8_t *obmc1= obmc + y*obmc_stride;
2550             const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2551             const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2552             const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2553             for(x=0; x<b_w; x++){
2554                 int v=   obmc1[x] * block[3][x + y*src_stride]
2555                         +obmc2[x] * block[2][x + y*src_stride]
2556                         +obmc3[x] * block[1][x + y*src_stride]
2557                         +obmc4[x] * block[0][x + y*src_stride];
2558
2559                 v <<= 8 - LOG2_OBMC_MAX;
2560                 if(FRAC_BITS != 8){
2561                     v >>= 8 - FRAC_BITS;
2562                 }
2563                 if(add){
2564                     v += dst[x + y*dst_stride];
2565                     v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2566                     if(v&(~255)) v= ~(v>>31);
2567                     dst8[x + y*src_stride] = v;
2568                 }else{
2569                     dst[x + y*dst_stride] -= v;
2570                 }
2571             }
2572         }
2573     }
2574 #endif /* 0 */
2575 }
2576
2577 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2578     Plane *p= &s->plane[plane_index];
2579     const int mb_w= s->b_width  << s->block_max_depth;
2580     const int mb_h= s->b_height << s->block_max_depth;
2581     int x, y, mb_x;
2582     int block_size = MB_SIZE >> s->block_max_depth;
2583     int block_w    = plane_index ? block_size/2 : block_size;
2584     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2585     int obmc_stride= plane_index ? block_size : 2*block_size;
2586     int ref_stride= s->current_picture.linesize[plane_index];
2587     uint8_t *dst8= s->current_picture.data[plane_index];
2588     int w= p->width;
2589     int h= p->height;
2590
2591     if(s->keyframe || (s->avctx->debug&512)){
2592         if(mb_y==mb_h)
2593             return;
2594
2595         if(add){
2596             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2597 //                DWTELEM * line = slice_buffer_get_line(sb, y);
2598                 IDWTELEM * line = sb->line[y];
2599                 for(x=0; x<w; x++){
2600 //                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2601                     int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2602                     v >>= FRAC_BITS;
2603                     if(v&(~255)) v= ~(v>>31);
2604                     dst8[x + y*ref_stride]= v;
2605                 }
2606             }
2607         }else{
2608             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2609 //                DWTELEM * line = slice_buffer_get_line(sb, y);
2610                 IDWTELEM * line = sb->line[y];
2611                 for(x=0; x<w; x++){
2612                     line[x] -= 128 << FRAC_BITS;
2613 //                    buf[x + y*w]-= 128<<FRAC_BITS;
2614                 }
2615             }
2616         }
2617
2618         return;
2619     }
2620
2621     for(mb_x=0; mb_x<=mb_w; mb_x++){
2622         add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2623                    block_w*mb_x - block_w/2,
2624                    block_w*mb_y - block_w/2,
2625                    block_w, block_w,
2626                    w, h,
2627                    w, ref_stride, obmc_stride,
2628                    mb_x - 1, mb_y - 1,
2629                    add, 0, plane_index);
2630     }
2631 }
2632
2633 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2634     Plane *p= &s->plane[plane_index];
2635     const int mb_w= s->b_width  << s->block_max_depth;
2636     const int mb_h= s->b_height << s->block_max_depth;
2637     int x, y, mb_x;
2638     int block_size = MB_SIZE >> s->block_max_depth;
2639     int block_w    = plane_index ? block_size/2 : block_size;
2640     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2641     const int obmc_stride= plane_index ? block_size : 2*block_size;
2642     int ref_stride= s->current_picture.linesize[plane_index];
2643     uint8_t *dst8= s->current_picture.data[plane_index];
2644     int w= p->width;
2645     int h= p->height;
2646
2647     if(s->keyframe || (s->avctx->debug&512)){
2648         if(mb_y==mb_h)
2649             return;
2650
2651         if(add){
2652             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2653                 for(x=0; x<w; x++){
2654                     int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2655                     v >>= FRAC_BITS;
2656                     if(v&(~255)) v= ~(v>>31);
2657                     dst8[x + y*ref_stride]= v;
2658                 }
2659             }
2660         }else{
2661             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2662                 for(x=0; x<w; x++){
2663                     buf[x + y*w]-= 128<<FRAC_BITS;
2664                 }
2665             }
2666         }
2667
2668         return;
2669     }
2670
2671     for(mb_x=0; mb_x<=mb_w; mb_x++){
2672         add_yblock(s, 0, NULL, buf, dst8, obmc,
2673                    block_w*mb_x - block_w/2,
2674                    block_w*mb_y - block_w/2,
2675                    block_w, block_w,
2676                    w, h,
2677                    w, ref_stride, obmc_stride,
2678                    mb_x - 1, mb_y - 1,
2679                    add, 1, plane_index);
2680     }
2681 }
2682
2683 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2684     const int mb_h= s->b_height << s->block_max_depth;
2685     int mb_y;
2686     for(mb_y=0; mb_y<=mb_h; mb_y++)
2687         predict_slice(s, buf, plane_index, add, mb_y);
2688 }
2689
2690 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2691     int i, x2, y2;
2692     Plane *p= &s->plane[plane_index];
2693     const int block_size = MB_SIZE >> s->block_max_depth;
2694     const int block_w    = plane_index ? block_size/2 : block_size;
2695     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2696     const int obmc_stride= plane_index ? block_size : 2*block_size;
2697     const int ref_stride= s->current_picture.linesize[plane_index];
2698     uint8_t *src= s-> input_picture.data[plane_index];
2699     IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2700     const int b_stride = s->b_width << s->block_max_depth;
2701     const int w= p->width;
2702     const int h= p->height;
2703     int index= mb_x + mb_y*b_stride;
2704     BlockNode *b= &s->block[index];
2705     BlockNode backup= *b;
2706     int ab=0;
2707     int aa=0;
2708
2709     b->type|= BLOCK_INTRA;
2710     b->color[plane_index]= 0;
2711     memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2712
2713     for(i=0; i<4; i++){
2714         int mb_x2= mb_x + (i &1) - 1;
2715         int mb_y2= mb_y + (i>>1) - 1;
2716         int x= block_w*mb_x2 + block_w/2;
2717         int y= block_w*mb_y2 + block_w/2;
2718
2719         add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2720                     x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2721
2722         for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2723             for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2724                 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2725                 int obmc_v= obmc[index];
2726                 int d;
2727                 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2728                 if(x<0) obmc_v += obmc[index + block_w];
2729                 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2730                 if(x+block_w>w) obmc_v += obmc[index - block_w];
2731                 //FIXME precalculate this or simplify it somehow else
2732
2733                 d = -dst[index] + (1<<(FRAC_BITS-1));
2734                 dst[index] = d;
2735                 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2736                 aa += obmc_v * obmc_v; //FIXME precalculate this
2737             }
2738         }
2739     }
2740     *b= backup;
2741
2742     return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2743 }
2744
2745 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2746     const int b_stride = s->b_width << s->block_max_depth;
2747     const int b_height = s->b_height<< s->block_max_depth;
2748     int index= x + y*b_stride;
2749     const BlockNode *b     = &s->block[index];
2750     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2751     const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2752     const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2753     const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2754     int dmx, dmy;
2755 //  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2756 //  int my_context= av_log2(2*FFABS(left->my - top->my));
2757
2758     if(x<0 || x>=b_stride || y>=b_height)
2759         return 0;
2760 /*
2761 1            0      0
2762 01X          1-2    1
2763 001XX        3-6    2-3
2764 0001XXX      7-14   4-7
2765 00001XXXX   15-30   8-15
2766 */
2767 //FIXME try accurate rate
2768 //FIXME intra and inter predictors if surrounding blocks are not the same type
2769     if(b->type & BLOCK_INTRA){
2770         return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2771                    + av_log2(2*FFABS(left->color[1] - b->color[1]))
2772                    + av_log2(2*FFABS(left->color[2] - b->color[2])));
2773     }else{
2774         pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2775         dmx-= b->mx;
2776         dmy-= b->my;
2777         return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2778                     + av_log2(2*FFABS(dmy))
2779                     + av_log2(2*b->ref));
2780     }
2781 }
2782
2783 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2784     Plane *p= &s->plane[plane_index];
2785     const int block_size = MB_SIZE >> s->block_max_depth;
2786     const int block_w    = plane_index ? block_size/2 : block_size;
2787     const int obmc_stride= plane_index ? block_size : 2*block_size;
2788     const int ref_stride= s->current_picture.linesize[plane_index];
2789     uint8_t *dst= s->current_picture.data[plane_index];
2790     uint8_t *src= s->  input_picture.data[plane_index];
2791     IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2792     uint8_t *cur = s->scratchbuf;
2793     uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2794     const int b_stride = s->b_width << s->block_max_depth;
2795     const int b_height = s->b_height<< s->block_max_depth;
2796     const int w= p->width;
2797     const int h= p->height;
2798     int distortion;
2799     int rate= 0;
2800     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2801     int sx= block_w*mb_x - block_w/2;
2802     int sy= block_w*mb_y - block_w/2;
2803     int x0= FFMAX(0,-sx);
2804     int y0= FFMAX(0,-sy);
2805     int x1= FFMIN(block_w*2, w-sx);
2806     int y1= FFMIN(block_w*2, h-sy);
2807     int i,x,y;
2808
2809     pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2810
2811     for(y=y0; y<y1; y++){
2812         const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2813         const IDWTELEM *pred1 = pred + y*obmc_stride;
2814         uint8_t *cur1 = cur + y*ref_stride;
2815         uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2816         for(x=x0; x<x1; x++){
2817 #if FRAC_BITS >= LOG2_OBMC_MAX
2818             int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2819 #else
2820             int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2821 #endif
2822             v = (v + pred1[x]) >> FRAC_BITS;
2823             if(v&(~255)) v= ~(v>>31);
2824             dst1[x] = v;
2825         }
2826     }
2827
2828     /* copy the regions where obmc[] = (uint8_t)256 */
2829     if(LOG2_OBMC_MAX == 8
2830         && (mb_x == 0 || mb_x == b_stride-1)
2831         && (mb_y == 0 || mb_y == b_height-1)){
2832         if(mb_x == 0)
2833             x1 = block_w;
2834         else
2835             x0 = block_w;
2836         if(mb_y == 0)
2837             y1 = block_w;
2838         else
2839             y0 = block_w;
2840         for(y=y0; y<y1; y++)
2841             memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2842     }
2843
2844     if(block_w==16){
2845         /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2846         /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2847         /* FIXME cmps overlap but do not cover the wavelet's whole support.
2848          * So improving the score of one block is not strictly guaranteed
2849          * to improve the score of the whole frame, thus iterative motion
2850          * estimation does not always converge. */
2851         if(s->avctx->me_cmp == FF_CMP_W97)
2852             distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2853         else if(s->avctx->me_cmp == FF_CMP_W53)
2854             distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2855         else{
2856             distortion = 0;
2857             for(i=0; i<4; i++){
2858                 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2859                 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2860             }
2861         }
2862     }else{
2863         assert(block_w==8);
2864         distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2865     }
2866
2867     if(plane_index==0){
2868         for(i=0; i<4; i++){
2869 /* ..RRr
2870  * .RXx.
2871  * rxx..
2872  */
2873             rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2874         }
2875         if(mb_x == b_stride-2)
2876             rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2877     }
2878     return distortion + rate*penalty_factor;
2879 }
2880
2881 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2882     int i, y2;
2883     Plane *p= &s->plane[plane_index];
2884     const int block_size = MB_SIZE >> s->block_max_depth;
2885     const int block_w    = plane_index ? block_size/2 : block_size;
2886     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2887     const int obmc_stride= plane_index ? block_size : 2*block_size;
2888     const int ref_stride= s->current_picture.linesize[plane_index];
2889     uint8_t *dst= s->current_picture.data[plane_index];
2890     uint8_t *src= s-> input_picture.data[plane_index];
2891     //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2892     // const has only been removed from zero_dst to suppress a warning
2893     static IDWTELEM zero_dst[4096]; //FIXME
2894     const int b_stride = s->b_width << s->block_max_depth;
2895     const int w= p->width;
2896     const int h= p->height;
2897     int distortion= 0;
2898     int rate= 0;
2899     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2900
2901     for(i=0; i<9; i++){
2902         int mb_x2= mb_x + (i%3) - 1;
2903         int mb_y2= mb_y + (i/3) - 1;
2904         int x= block_w*mb_x2 + block_w/2;
2905         int y= block_w*mb_y2 + block_w/2;
2906
2907         add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2908                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2909
2910         //FIXME find a cleaner/simpler way to skip the outside stuff
2911         for(y2= y; y2<0; y2++)
2912             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2913         for(y2= h; y2<y+block_w; y2++)
2914             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2915         if(x<0){
2916             for(y2= y; y2<y+block_w; y2++)
2917                 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2918         }
2919         if(x+block_w > w){
2920             for(y2= y; y2<y+block_w; y2++)
2921                 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2922         }
2923
2924         assert(block_w== 8 || block_w==16);
2925         distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2926     }
2927
2928     if(plane_index==0){
2929         BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2930         int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2931
2932 /* ..RRRr
2933  * .RXXx.
2934  * .RXXx.
2935  * rxxx.
2936  */
2937         if(merged)
2938             rate = get_block_bits(s, mb_x, mb_y, 2);
2939         for(i=merged?4:0; i<9; i++){
2940             static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2941             rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2942         }
2943     }
2944     return distortion + rate*penalty_factor;
2945 }
2946
2947 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2948     const int b_stride= s->b_width << s->block_max_depth;
2949     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2950     BlockNode backup= *block;
2951     int rd, index, value;
2952
2953     assert(mb_x>=0 && mb_y>=0);
2954     assert(mb_x<b_stride);
2955
2956     if(intra){
2957         block->color[0] = p[0];
2958         block->color[1] = p[1];
2959         block->color[2] = p[2];
2960         block->type |= BLOCK_INTRA;
2961     }else{
2962         index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2963         value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2964         if(s->me_cache[index] == value)
2965             return 0;
2966         s->me_cache[index]= value;
2967
2968         block->mx= p[0];
2969         block->my= p[1];
2970         block->type &= ~BLOCK_INTRA;
2971     }
2972
2973     rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2974
2975 //FIXME chroma
2976     if(rd < *best_rd){
2977         *best_rd= rd;
2978         return 1;
2979     }else{
2980         *block= backup;
2981         return 0;
2982     }
2983 }
2984
2985 /* special case for int[2] args we discard afterwards,
2986  * fixes compilation problem with gcc 2.95 */
2987 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2988     int p[2] = {p0, p1};
2989     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2990 }
2991
2992 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2993     const int b_stride= s->b_width << s->block_max_depth;
2994     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2995     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2996     int rd, index, value;
2997
2998     assert(mb_x>=0 && mb_y>=0);
2999     assert(mb_x<b_stride);
3000     assert(((mb_x|mb_y)&1) == 0);
3001
3002     index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3003     value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3004     if(s->me_cache[index] == value)
3005         return 0;
3006     s->me_cache[index]= value;
3007
3008     block->mx= p0;
3009     block->my= p1;
3010     block->ref= ref;
3011     block->type &= ~BLOCK_INTRA;
3012     block[1]= block[b_stride]= block[b_stride+1]= *block;
3013
3014     rd= get_4block_rd(s, mb_x, mb_y, 0);
3015
3016 //FIXME chroma
3017     if(rd < *best_rd){
3018         *best_rd= rd;
3019         return 1;
3020     }else{
3021         block[0]= backup[0];
3022         block[1]= backup[1];
3023         block[b_stride]= backup[2];
3024         block[b_stride+1]= backup[3];
3025         return 0;
3026     }
3027 }
3028
3029 static void iterative_me(SnowContext *s){
3030     int pass, mb_x, mb_y;
3031     const int b_width = s->b_width  << s->block_max_depth;
3032     const int b_height= s->b_height << s->block_max_depth;
3033     const int b_stride= b_width;
3034     int color[3];
3035
3036     {
3037         RangeCoder r = s->c;
3038         uint8_t state[sizeof(s->block_state)];
3039         memcpy(state, s->block_state, sizeof(s->block_state));
3040         for(mb_y= 0; mb_y<s->b_height; mb_y++)
3041             for(mb_x= 0; mb_x<s->b_width; mb_x++)
3042                 encode_q_branch(s, 0, mb_x, mb_y);
3043         s->c = r;
3044         memcpy(s->block_state, state, sizeof(s->block_state));
3045     }
3046
3047     for(pass=0; pass<25; pass++){
3048         int change= 0;
3049
3050         for(mb_y= 0; mb_y<b_height; mb_y++){
3051             for(mb_x= 0; mb_x<b_width; mb_x++){
3052                 int dia_change, i, j, ref;
3053                 int best_rd= INT_MAX, ref_rd;
3054                 BlockNode backup, ref_b;
3055                 const int index= mb_x + mb_y * b_stride;
3056                 BlockNode *block= &s->block[index];
3057                 BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3058                 BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3059                 BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3060                 BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3061                 BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3062                 BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3063                 BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3064                 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3065                 const int b_w= (MB_SIZE >> s->block_max_depth);
3066                 uint8_t obmc_edged[b_w*2][b_w*2];
3067
3068                 if(pass && (block->type & BLOCK_OPT))
3069                     continue;
3070                 block->type |= BLOCK_OPT;
3071
3072                 backup= *block;
3073
3074                 if(!s->me_cache_generation)
3075                     memset(s->me_cache, 0, sizeof(s->me_cache));
3076                 s->me_cache_generation += 1<<22;
3077
3078                 //FIXME precalculate
3079                 {
3080                     int x, y;
3081                     memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3082                     if(mb_x==0)
3083                         for(y=0; y<b_w*2; y++)
3084                             memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3085                     if(mb_x==b_stride-1)
3086                         for(y=0; y<b_w*2; y++)
3087                             memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3088                     if(mb_y==0){
3089                         for(x=0; x<b_w*2; x++)
3090                             obmc_edged[0][x] += obmc_edged[b_w-1][x];
3091                         for(y=1; y<b_w; y++)
3092                             memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3093                     }
3094                     if(mb_y==b_height-1){
3095                         for(x=0; x<b_w*2; x++)
3096                             obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3097                         for(y=b_w; y<b_w*2-1; y++)
3098                             memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3099                     }
3100                 }
3101
3102                 //skip stuff outside the picture
3103                 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3104                     uint8_t *src= s->  input_picture.data[0];
3105                     uint8_t *dst= s->current_picture.data[0];
3106                     const int stride= s->current_picture.linesize[0];
3107                     const int block_w= MB_SIZE >> s->block_max_depth;
3108                     const int sx= block_w*mb_x - block_w/2;
3109                     const int sy= block_w*mb_y - block_w/2;
3110                     const int w= s->plane[0].width;
3111                     const int h= s->plane[0].height;
3112                     int y;
3113
3114                     for(y=sy; y<0; y++)
3115                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3116                     for(y=h; y<sy+block_w*2; y++)
3117                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3118                     if(sx<0){
3119                         for(y=sy; y<sy+block_w*2; y++)
3120                             memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3121                     }
3122                     if(sx+block_w*2 > w){
3123                         for(y=sy; y<sy+block_w*2; y++)
3124                             memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3125                     }
3126                 }
3127
3128                 // intra(black) = neighbors' contribution to the current block
3129                 for(i=0; i<3; i++)
3130                     color[i]= get_dc(s, mb_x, mb_y, i);
3131
3132                 // get previous score (cannot be cached due to OBMC)
3133                 if(pass > 0 && (block->type&BLOCK_INTRA)){
3134                     int color0[3]= {block->color[0], block->color[1], block->color[2]};
3135                     check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3136                 }else
3137                     check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3138
3139                 ref_b= *block;
3140                 ref_rd= best_rd;
3141                 for(ref=0; ref < s->ref_frames; ref++){
3142                     int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3143                     if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3144                         continue;
3145                     block->ref= ref;
3146                     best_rd= INT_MAX;
3147
3148                     check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3149                     check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3150                     if(tb)
3151                         check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3152                     if(lb)
3153                         check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3154                     if(rb)
3155                         check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3156                     if(bb)
3157                         check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3158
3159                     /* fullpel ME */
3160                     //FIXME avoid subpel interpolation / round to nearest integer
3161                     do{
3162                         dia_change=0;
3163                         for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3164                             for(j=0; j<i; j++){
3165                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3166                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3167                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3168                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3169                             }
3170                         }
3171                     }while(dia_change);
3172                     /* subpel ME */
3173                     do{
3174                         static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3175                         dia_change=0;
3176                         for(i=0; i<8; i++)
3177                             dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3178                     }while(dia_change);
3179                     //FIXME or try the standard 2 pass qpel or similar
3180
3181                     mvr[0][0]= block->mx;
3182                     mvr[0][1]= block->my;
3183                     if(ref_rd > best_rd){
3184                         ref_rd= best_rd;
3185                         ref_b= *block;
3186                     }
3187                 }
3188                 best_rd= ref_rd;
3189                 *block= ref_b;
3190 #if 1
3191                 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3192                 //FIXME RD style color selection
3193 #endif
3194                 if(!same_block(block, &backup)){
3195                     if(tb ) tb ->type &= ~BLOCK_OPT;
3196                     if(lb ) lb ->type &= ~BLOCK_OPT;
3197                     if(rb ) rb ->type &= ~BLOCK_OPT;
3198                     if(bb ) bb ->type &= ~BLOCK_OPT;
3199                     if(tlb) tlb->type &= ~BLOCK_OPT;
3200                     if(trb) trb->type &= ~BLOCK_OPT;
3201                     if(blb) blb->type &= ~BLOCK_OPT;
3202                     if(brb) brb->type &= ~BLOCK_OPT;
3203                     change ++;
3204                 }
3205             }
3206         }
3207         av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3208         if(!change)
3209             break;
3210     }
3211
3212     if(s->block_max_depth == 1){
3213         int change= 0;
3214         for(mb_y= 0; mb_y<b_height; mb_y+=2){
3215             for(mb_x= 0; mb_x<b_width; mb_x+=2){
3216                 int i;
3217                 int best_rd, init_rd;
3218                 const int index= mb_x + mb_y * b_stride;
3219                 BlockNode *b[4];
3220
3221                 b[0]= &s->block[index];
3222                 b[1]= b[0]+1;
3223                 b[2]= b[0]+b_stride;
3224                 b[3]= b[2]+1;
3225                 if(same_block(b[0], b[1]) &&
3226                    same_block(b[0], b[2]) &&
3227                    same_block(b[0], b[3]))
3228                     continue;
3229
3230                 if(!s->me_cache_generation)
3231                     memset(s->me_cache, 0, sizeof(s->me_cache));
3232                 s->me_cache_generation += 1<<22;
3233
3234                 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3235
3236                 //FIXME more multiref search?
3237                 check_4block_inter(s, mb_x, mb_y,
3238                                    (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3239                                    (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3240
3241                 for(i=0; i<4; i++)
3242                     if(!(b[i]->type&BLOCK_INTRA))
3243                         check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3244
3245                 if(init_rd != best_rd)
3246                     change++;
3247             }
3248         }
3249         av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3250     }
3251 }
3252
3253 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3254     const int w= b->width;
3255     const int h= b->height;
3256     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3257     const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3258     int x,y, thres1, thres2;
3259
3260     if(s->qlog == LOSSLESS_QLOG){
3261         for(y=0; y<h; y++)
3262             for(x=0; x<w; x++)
3263                 dst[x + y*stride]= src[x + y*stride];
3264         return;
3265     }
3266
3267     bias= bias ? 0 : (3*qmul)>>3;
3268     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3269     thres2= 2*thres1;
3270
3271     if(!bias){
3272         for(y=0; y<h; y++){
3273             for(x=0; x<w; x++){
3274                 int i= src[x + y*stride];
3275
3276                 if((unsigned)(i+thres1) > thres2){
3277                     if(i>=0){
3278                         i<<= QEXPSHIFT;
3279                         i/= qmul; //FIXME optimize
3280                         dst[x + y*stride]=  i;
3281                     }else{
3282                         i= -i;
3283                         i<<= QEXPSHIFT;
3284                         i/= qmul; //FIXME optimize
3285                         dst[x + y*stride]= -i;
3286                     }
3287                 }else
3288                     dst[x + y*stride]= 0;
3289             }
3290         }
3291     }else{
3292         for(y=0; y<h; y++){
3293             for(x=0; x<w; x++){
3294                 int i= src[x + y*stride];
3295
3296                 if((unsigned)(i+thres1) > thres2){
3297                     if(i>=0){
3298                         i<<= QEXPSHIFT;
3299                         i= (i + bias) / qmul; //FIXME optimize
3300                         dst[x + y*stride]=  i;
3301                     }else{
3302                         i= -i;
3303                         i<<= QEXPSHIFT;
3304                         i= (i + bias) / qmul; //FIXME optimize
3305                         dst[x + y*stride]= -i;
3306                     }
3307                 }else
3308                     dst[x + y*stride]= 0;
3309             }
3310         }
3311     }
3312 }
3313
3314 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3315     const int w= b->width;
3316     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3317     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3318     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3319     int x,y;
3320
3321     if(s->qlog == LOSSLESS_QLOG) return;
3322
3323     for(y=start_y; y<end_y; y++){
3324 //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3325         IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3326         for(x=0; x<w; x++){
3327             int i= line[x];
3328             if(i<0){
3329                 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3330             }else if(i>0){
3331                 line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3332             }
3333         }
3334     }
3335 }
3336
3337 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3338     const int w= b->width;
3339     const int h= b->height;
3340     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3341     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3342     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3343     int x,y;
3344
3345     if(s->qlog == LOSSLESS_QLOG) return;
3346
3347     for(y=0; y<h; y++){
3348         for(x=0; x<w; x++){
3349             int i= src[x + y*stride];
3350             if(i<0){
3351                 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3352             }else if(i>0){
3353                 src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3354             }
3355         }
3356     }
3357 }
3358
3359 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3360     const int w= b->width;
3361     const int h= b->height;
3362     int x,y;
3363
3364     for(y=h-1; y>=0; y--){
3365         for(x=w-1; x>=0; x--){
3366             int i= x + y*stride;
3367
3368             if(x){
3369                 if(use_median){
3370                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3371                     else  src[i] -= src[i - 1];
3372                 }else{
3373                     if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3374                     else  src[i] -= src[i - 1];
3375                 }
3376             }else{
3377                 if(y) src[i] -= src[i - stride];
3378             }
3379         }
3380     }
3381 }
3382
3383 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3384     const int w= b->width;
3385     int x,y;
3386
3387     IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3388     IDWTELEM * prev;
3389
3390     if (start_y != 0)
3391         line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3392
3393     for(y=start_y; y<end_y; y++){
3394         prev = line;
3395 //        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3396         line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3397         for(x=0; x<w; x++){
3398             if(x){
3399                 if(use_median){
3400                     if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3401                     else  line[x] += line[x - 1];
3402                 }else{
3403                     if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3404                     else  line[x] += line[x - 1];
3405                 }
3406             }else{
3407                 if(y) line[x] += prev[x];
3408             }
3409         }
3410     }
3411 }
3412
3413 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3414     const int w= b->width;
3415     const int h= b->height;
3416     int x,y;
3417
3418     for(y=0; y<h; y++){
3419         for(x=0; x<w; x++){
3420             int i= x + y*stride;
3421
3422             if(x){
3423                 if(use_median){
3424                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3425                     else  src[i] += src[i - 1];
3426                 }else{
3427                     if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3428                     else  src[i] += src[i - 1];
3429                 }
3430             }else{
3431                 if(y) src[i] += src[i - stride];
3432             }
3433         }
3434     }
3435 }
3436
3437 static void encode_qlogs(SnowContext *s){
3438     int plane_index, level, orientation;
3439
3440     for(plane_index=0; plane_index<2; plane_index++){
3441         for(level=0; level<s->spatial_decomposition_count; level++){
3442             for(orientation=level ? 1:0; orientation<4; orientation++){
3443                 if(orientation==2) continue;
3444                 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3445             }
3446         }
3447     }
3448 }
3449
3450 static void encode_header(SnowContext *s){
3451     int plane_index, i;
3452     uint8_t kstate[32];
3453
3454     memset(kstate, MID_STATE, sizeof(kstate));
3455
3456     put_rac(&s->c, kstate, s->keyframe);
3457     if(s->keyframe || s->always_reset){
3458         reset_contexts(s);
3459         s->last_spatial_decomposition_type=
3460         s->last_qlog=
3461         s->last_qbias=
3462         s->last_mv_scale=
3463         s->last_block_max_depth= 0;
3464         for(plane_index=0; plane_index<2; plane_index++){
3465             Plane *p= &s->plane[plane_index];
3466             p->last_htaps=0;
3467             p->last_diag_mc=0;
3468             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3469         }
3470     }
3471     if(s->keyframe){
3472         put_symbol(&s->c, s->header_state, s->version, 0);
3473         put_rac(&s->c, s->header_state, s->always_reset);
3474         put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3475         put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3476         put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3477         put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3478         put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3479         put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3480         put_rac(&s->c, s->header_state, s->spatial_scalability);
3481 //        put_rac(&s->c, s->header_state, s->rate_scalability);
3482         put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3483
3484         encode_qlogs(s);
3485     }
3486
3487     if(!s->keyframe){
3488         int update_mc=0;
3489         for(plane_index=0; plane_index<2; plane_index++){
3490             Plane *p= &s->plane[plane_index];
3491             update_mc |= p->last_htaps   != p->htaps;
3492             update_mc |= p->last_diag_mc != p->diag_mc;
3493             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3494         }
3495         put_rac(&s->c, s->header_state, update_mc);
3496         if(update_mc){
3497             for(plane_index=0; plane_index<2; plane_index++){
3498                 Plane *p= &s->plane[plane_index];
3499                 put_rac(&s->c, s->header_state, p->diag_mc);
3500                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3501                 for(i= p->htaps/2; i; i--)
3502                     put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3503             }
3504         }
3505         if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3506             put_rac(&s->c, s->header_state, 1);
3507             put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3508             encode_qlogs(s);
3509         }else
3510             put_rac(&s->c, s->header_state, 0);
3511     }
3512
3513     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3514     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3515     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3516     put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3517     put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3518
3519 }
3520
3521 static void update_last_header_values(SnowContext *s){
3522     int plane_index;
3523
3524     if(!s->keyframe){
3525         for(plane_index=0; plane_index<2; plane_index++){
3526             Plane *p= &s->plane[plane_index];
3527             p->last_diag_mc= p->diag_mc;
3528             p->last_htaps  = p->htaps;
3529             memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3530         }
3531     }
3532
3533     s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3534     s->last_qlog                        = s->qlog;
3535     s->last_qbias                       = s->qbias;
3536     s->last_mv_scale                    = s->mv_scale;
3537     s->last_block_max_depth             = s->block_max_depth;
3538     s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3539 }
3540
3541 static void decode_qlogs(SnowContext *s){
3542     int plane_index, level, orientation;
3543
3544     for(plane_index=0; plane_index<3; plane_index++){
3545         for(level=0; level<s->spatial_decomposition_count; level++){
3546             for(orientation=level ? 1:0; orientation<4; orientation++){
3547                 int q;
3548                 if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3549                 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3550                 else                    q= get_symbol(&s->c, s->header_state, 1);
3551                 s->plane[plane_index].band[level][orientation].qlog= q;
3552             }
3553         }
3554     }
3555 }
3556
3557 #define GET_S(dst, check) \
3558     tmp= get_symbol(&s->c, s->header_state, 0);\
3559     if(!(check)){\
3560         av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
3561         return -1;\
3562     }\
3563     dst= tmp;
3564
3565 static int decode_header(SnowContext *s){
3566     int plane_index, tmp;
3567     uint8_t kstate[32];
3568
3569     memset(kstate, MID_STATE, sizeof(kstate));
3570
3571     s->keyframe= get_rac(&s->c, kstate);
3572     if(s->keyframe || s->always_reset){
3573         reset_contexts(s);
3574         s->spatial_decomposition_type=
3575         s->qlog=
3576         s->qbias=
3577         s->mv_scale=
3578         s->block_max_depth= 0;
3579     }
3580     if(s->keyframe){
3581         GET_S(s->version, tmp <= 0U)
3582         s->always_reset= get_rac(&s->c, s->header_state);
3583         s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3584         s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3585         GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3586         s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3587         s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3588         s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3589         s->spatial_scalability= get_rac(&s->c, s->header_state);
3590 //        s->rate_scalability= get_rac(&s->c, s->header_state);
3591         GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
3592         s->max_ref_frames++;
3593
3594         decode_qlogs(s);
3595     }
3596
3597     if(!s->keyframe){
3598         if(get_rac(&s->c, s->header_state)){
3599             for(plane_index=0; plane_index<2; plane_index++){
3600                 int htaps, i, sum=0;
3601                 Plane *p= &s->plane[plane_index];
3602                 p->diag_mc= get_rac(&s->c, s->header_state);
3603                 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3604                 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3605                     return -1;
3606                 p->htaps= htaps;
3607                 for(i= htaps/2; i; i--){
3608                     p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3609                     sum += p->hcoeff[i];
3610                 }
3611                 p->hcoeff[0]= 32-sum;
3612             }
3613             s->plane[2].diag_mc= s->plane[1].diag_mc;
3614             s->plane[2].htaps  = s->plane[1].htaps;
3615             memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3616         }
3617         if(get_rac(&s->c, s->header_state)){
3618             GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3619             decode_qlogs(s);
3620         }
3621     }
3622
3623     s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3624     if(s->spatial_decomposition_type > 1U){
3625         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3626         return -1;
3627     }
3628     if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
3629              s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
3630         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
3631         return -1;
3632     }
3633
3634     s->qlog           += get_symbol(&s->c, s->header_state, 1);
3635     s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3636     s->qbias          += get_symbol(&s->c, s->header_state, 1);
3637     s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3638     if(s->block_max_depth > 1 || s->block_max_depth < 0){
3639         av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3640         s->block_max_depth= 0;
3641         return -1;
3642     }
3643
3644     return 0;
3645 }
3646
3647 static void init_qexp(void){
3648     int i;
3649     double v=128;
3650
3651     for(i=0; i<QROOT; i++){
3652         qexp[i]= lrintf(v);
3653         v *= pow(2, 1.0 / QROOT);
3654     }
3655 }
3656
3657 static av_cold int common_init(AVCodecContext *avctx){
3658     SnowContext *s = avctx->priv_data;
3659     int width, height;
3660     int i, j;
3661
3662     s->avctx= avctx;
3663     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
3664
3665     dsputil_init(&s->dsp, avctx);
3666
3667 #define mcf(dx,dy)\
3668     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3669     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3670         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3671     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3672     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3673         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3674
3675     mcf( 0, 0)
3676     mcf( 4, 0)
3677     mcf( 8, 0)
3678     mcf(12, 0)
3679     mcf( 0, 4)
3680     mcf( 4, 4)
3681     mcf( 8, 4)
3682     mcf(12, 4)
3683     mcf( 0, 8)
3684     mcf( 4, 8)
3685     mcf( 8, 8)
3686     mcf(12, 8)
3687     mcf( 0,12)
3688     mcf( 4,12)
3689     mcf( 8,12)
3690     mcf(12,12)
3691
3692 #define mcfh(dx,dy)\
3693     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3694     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3695         mc_block_hpel ## dx ## dy ## 16;\
3696     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3697     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3698         mc_block_hpel ## dx ## dy ## 8;
3699
3700     mcfh(0, 0)
3701     mcfh(8, 0)
3702     mcfh(0, 8)
3703     mcfh(8, 8)
3704
3705     if(!qexp[0])
3706         init_qexp();
3707
3708 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3709
3710     width= s->avctx->width;
3711     height= s->avctx->height;
3712
3713     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3714     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3715
3716     for(i=0; i<MAX_REF_FRAMES; i++)
3717         for(j=0; j<MAX_REF_FRAMES; j++)
3718             scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3719
3720     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3721     s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3722
3723     return 0;
3724 }
3725
3726 static int common_init_after_header(AVCodecContext *avctx){
3727     SnowContext *s = avctx->priv_data;
3728     int plane_index, level, orientation;
3729
3730     for(plane_index=0; plane_index<3; plane_index++){
3731         int w= s->avctx->width;
3732         int h= s->avctx->height;
3733
3734         if(plane_index){
3735             w>>= s->chroma_h_shift;
3736             h>>= s->chroma_v_shift;
3737         }
3738         s->plane[plane_index].width = w;
3739         s->plane[plane_index].height= h;
3740
3741         for(level=s->spatial_decomposition_count-1; level>=0; level--){
3742             for(orientation=level ? 1 : 0; orientation<4; orientation++){
3743                 SubBand *b= &s->plane[plane_index].band[level][orientation];
3744
3745                 b->buf= s->spatial_dwt_buffer;
3746                 b->level= level;
3747                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3748                 b->width = (w + !(orientation&1))>>1;
3749                 b->height= (h + !(orientation>1))>>1;
3750
3751                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3752                 b->buf_x_offset = 0;
3753                 b->buf_y_offset = 0;
3754
3755                 if(orientation&1){
3756                     b->buf += (w+1)>>1;
3757                     b->buf_x_offset = (w+1)>>1;
3758                 }
3759                 if(orientation>1){
3760                     b->buf += b->stride>>1;
3761                     b->buf_y_offset = b->stride_line >> 1;
3762                 }
3763                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3764
3765                 if(level)
3766                     b->parent= &s->plane[plane_index].band[level-1][orientation];
3767                 //FIXME avoid this realloc
3768                 av_freep(&b->x_coeff);
3769                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3770             }
3771             w= (w+1)>>1;
3772             h= (h+1)>>1;
3773         }
3774     }
3775
3776     return 0;
3777 }
3778
3779 static int qscale2qlog(int qscale){
3780     return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3781            + 61*QROOT/8; //<64 >60
3782 }
3783
3784 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3785 {
3786     /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3787      * FIXME we know exact mv bits at this point,
3788      * but ratecontrol isn't set up to include them. */
3789     uint32_t coef_sum= 0;
3790     int level, orientation, delta_qlog;
3791
3792     for(level=0; level<s->spatial_decomposition_count; level++){
3793         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3794             SubBand *b= &s->plane[0].band[level][orientation];
3795             IDWTELEM *buf= b->ibuf;
3796             const int w= b->width;
3797             const int h= b->height;
3798             const int stride= b->stride;
3799             const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3800             const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3801             const int qdiv= (1<<16)/qmul;
3802             int x, y;
3803             //FIXME this is ugly
3804             for(y=0; y<h; y++)
3805                 for(x=0; x<w; x++)
3806                     buf[x+y*stride]= b->buf[x+y*stride];
3807             if(orientation==0)
3808                 decorrelate(s, b, buf, stride, 1, 0);
3809             for(y=0; y<h; y++)
3810                 for(x=0; x<w; x++)
3811                     coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3812         }
3813     }
3814
3815     /* ugly, ratecontrol just takes a sqrt again */
3816     coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3817     assert(coef_sum < INT_MAX);
3818
3819     if(pict->pict_type == FF_I_TYPE){
3820         s->m.current_picture.mb_var_sum= coef_sum;
3821         s->m.current_picture.mc_mb_var_sum= 0;
3822     }else{
3823         s->m.current_picture.mc_mb_var_sum= coef_sum;
3824         s->m.current_picture.mb_var_sum= 0;
3825     }
3826
3827     pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3828     if (pict->quality < 0)
3829         return INT_MIN;
3830     s->lambda= pict->quality * 3/2;
3831     delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3832     s->qlog+= delta_qlog;
3833     return delta_qlog;
3834 }
3835
3836 static void calculate_visual_weight(SnowContext *s, Plane *p){
3837     int width = p->width;
3838     int height= p->height;
3839     int level, orientation, x, y;
3840
3841     for(level=0; level<s->spatial_decomposition_count; level++){
3842         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3843             SubBand *b= &p->band[level][orientation];
3844             IDWTELEM *ibuf= b->ibuf;
3845             int64_t error=0;
3846
3847             memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3848             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3849             ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3850             for(y=0; y<height; y++){
3851                 for(x=0; x<width; x++){
3852                     int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3853                     error += d*d;
3854                 }
3855             }
3856
3857             b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3858         }
3859     }
3860 }
3861
3862 #define QUANTIZE2 0
3863
3864 #if QUANTIZE2==1
3865 #define Q2_STEP 8
3866
3867 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3868     SubBand *b= &p->band[level][orientation];
3869     int x, y;
3870     int xo=0;
3871     int yo=0;
3872     int step= 1 << (s->spatial_decomposition_count - level);
3873
3874     if(orientation&1)
3875         xo= step>>1;
3876     if(orientation&2)
3877         yo= step>>1;
3878
3879     //FIXME bias for nonzero ?
3880     //FIXME optimize
3881     memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3882     for(y=0; y<p->height; y++){
3883         for(x=0; x<p->width; x++){
3884             int sx= (x-xo + step/2) / step / Q2_STEP;
3885             int sy= (y-yo + step/2) / step / Q2_STEP;
3886             int v= r0[x + y*p->width] - r1[x + y*p->width];
3887             assert(sx>=0 && sy>=0 && sx < score_stride);
3888             v= ((v+8)>>4)<<4;
3889             score[sx + sy*score_stride] += v*v;
3890             assert(score[sx + sy*score_stride] >= 0);
3891         }
3892     }
3893 }
3894
3895 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3896     int level, orientation;
3897
3898     for(level=0; level<s->spatial_decomposition_count; level++){
3899         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3900             SubBand *b= &p->band[level][orientation];
3901             IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3902
3903             dequantize(s, b, dst, b->stride);
3904         }
3905     }
3906 }
3907
3908 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3909     int level, orientation, ys, xs, x, y, pass;
3910     IDWTELEM best_dequant[height * stride];
3911     IDWTELEM idwt2_buffer[height * stride];
3912     const int score_stride= (width + 10)/Q2_STEP;
3913     int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3914     int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3915     int threshold= (s->m.lambda * s->m.lambda) >> 6;
3916
3917     //FIXME pass the copy cleanly ?
3918
3919 //    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3920     ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3921
3922     for(level=0; level<s->spatial_decomposition_count; level++){
3923         for(orientation=level ? 1 : 0; orientation<4; orientation++){
3924             SubBand *b= &p->band[level][orientation];
3925             IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3926              DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
3927             assert(src == b->buf); // code does not depend on this but it is true currently
3928
3929             quantize(s, b, dst, src, b->stride, s->qbias);
3930         }
3931     }
3932     for(pass=0; pass<1; pass++){
3933         if(s->qbias == 0) //keyframe
3934             continue;
3935         for(level=0; level<s->spatial_decomposition_count; level++){
3936             for(orientation=level ? 1 : 0; orientation<4; orientation++){
3937                 SubBand *b= &p->band[level][orientation];
3938                 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3939                 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3940
3941                 for(ys= 0; ys<Q2_STEP; ys++){
3942                     for(xs= 0; xs<Q2_STEP; xs++){
3943                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3944                         dequantize_all(s, p, idwt2_buffer, width, height);
3945                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3946                         find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3947                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3948                         for(y=ys; y<b->height; y+= Q2_STEP){
3949                             for(x=xs; x<b->width; x+= Q2_STEP){
3950                                 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3951                                 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3952                                 //FIXME try more than just --
3953                             }
3954                         }
3955                         dequantize_all(s, p, idwt2_buffer, width, height);
3956                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3957                         find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3958                         for(y=ys; y<b->height; y+= Q2_STEP){
3959                             for(x=xs; x<b->width; x+= Q2_STEP){
3960                                 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3961                                 if(score[score_idx] <= best_score[score_idx] + threshold){
3962                                     best_score[score_idx]= score[score_idx];
3963                                     if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3964                                     if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3965                                     //FIXME copy instead
3966                                 }
3967                             }
3968                         }
3969                     }
3970                 }
3971             }
3972         }
3973     }
3974     memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3975 }
3976
3977 #endif /* QUANTIZE2==1 */
3978
3979 static av_cold int encode_init(AVCodecContext *avctx)
3980 {
3981     SnowContext *s = avctx->priv_data;
3982     int plane_index;
3983
3984     if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3985         av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3986                "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3987         return -1;
3988     }
3989
3990     if(avctx->prediction_method == DWT_97
3991        && (avctx->flags & CODEC_FLAG_QSCALE)
3992        && avctx->global_quality == 0){
3993         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3994         return -1;
3995     }
3996
3997     s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3998
3999     s->chroma_h_shift= 1; //FIXME XXX
4000     s->chroma_v_shift= 1;
4001
4002     s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
4003     s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
4004
4005     for(plane_index=0; plane_index<3; plane_index++){
4006         s->plane[plane_index].diag_mc= 1;
4007         s->plane[plane_index].htaps= 6;
4008         s->plane[plane_index].hcoeff[0]=  40;
4009         s->plane[plane_index].hcoeff[1]= -10;
4010         s->plane[plane_index].hcoeff[2]=   2;
4011         s->plane[plane_index].fast_mc= 1;
4012     }
4013
4014     common_init(avctx);
4015     alloc_blocks(s);
4016
4017     s->version=0;
4018
4019     s->m.avctx   = avctx;
4020     s->m.flags   = avctx->flags;
4021     s->m.bit_rate= avctx->bit_rate;
4022
4023     s->m.me.temp      =
4024     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4025     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4026     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4027     s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4028     h263_encode_init(&s->m); //mv_penalty
4029
4030     s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4031
4032     if(avctx->flags&CODEC_FLAG_PASS1){
4033         if(!avctx->stats_out)
4034             avctx->stats_out = av_mallocz(256);
4035     }
4036     if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4037         if(ff_rate_control_init(&s->m) < 0)
4038             return -1;
4039     }
4040     s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4041
4042     avctx->coded_frame= &s->current_picture;
4043     switch(avctx->pix_fmt){
4044 //    case PIX_FMT_YUV444P:
4045 //    case PIX_FMT_YUV422P:
4046     case PIX_FMT_YUV420P:
4047     case PIX_FMT_GRAY8:
4048 //    case PIX_FMT_YUV411P:
4049 //    case PIX_FMT_YUV410P:
4050         s->colorspace_type= 0;
4051         break;
4052 /*    case PIX_FMT_RGB32:
4053         s->colorspace= 1;
4054         break;*/
4055     default:
4056         av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4057         return -1;
4058     }
4059 //    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4060     s->chroma_h_shift= 1;
4061     s->chroma_v_shift= 1;
4062
4063     ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4064     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4065
4066     s->avctx->get_buffer(s->avctx, &s->input_picture);
4067
4068     if(s->avctx->me_method == ME_ITER){
4069         int i;
4070         int size= s->b_width * s->b_height << 2*s->block_max_depth;
4071         for(i=0; i<s->max_ref_frames; i++){
4072             s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4073             s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4074         }
4075     }
4076
4077     return 0;
4078 }
4079
4080 #define USE_HALFPEL_PLANE 0
4081
4082 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4083     int p,x,y;
4084
4085     assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4086
4087     for(p=0; p<3; p++){
4088         int is_chroma= !!p;
4089         int w= s->avctx->width  >>is_chroma;
4090         int h= s->avctx->height >>is_chroma;
4091         int ls= frame->linesize[p];
4092         uint8_t *src= frame->data[p];
4093
4094         halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4095         halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4096         halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4097
4098         halfpel[0][p]= src;
4099         for(y=0; y<h; y++){
4100             for(x=0; x<w; x++){
4101                 int i= y*ls + x;
4102
4103                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4104             }
4105         }
4106         for(y=0; y<h; y++){
4107             for(x=0; x<w; x++){
4108                 int i= y*ls + x;
4109
4110                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4111             }
4112         }
4113         src= halfpel[1][p];
4114         for(y=0; y<h; y++){
4115             for(x=0; x<w; x++){
4116                 int i= y*ls + x;
4117
4118                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4119             }
4120         }
4121
4122 //FIXME border!
4123     }
4124 }
4125
4126 static void release_buffer(AVCodecContext *avctx){
4127     SnowContext *s = avctx->priv_data;
4128     int i;
4129
4130     if(s->last_picture[s->max_ref_frames-1].data[0]){
4131         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4132         for(i=0; i<9; i++)
4133             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4134                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4135     }
4136 }
4137
4138 static int frame_start(SnowContext *s){
4139    AVFrame tmp;
4140    int w= s->avctx->width; //FIXME round up to x16 ?
4141    int h= s->avctx->height;
4142
4143     if(s->current_picture.data[0]){
4144         s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
4145         s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4146         s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4147     }
4148
4149     tmp= s->last_picture[s->max_ref_frames-1];
4150     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4151     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4152     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4153         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4154     s->last_picture[0]= s->current_picture;
4155     s->current_picture= tmp;
4156
4157     if(s->keyframe){
4158         s->ref_frames= 0;
4159     }else{
4160         int i;
4161         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4162             if(i && s->last_picture[i-1].key_frame)
4163                 break;
4164         s->ref_frames= i;
4165         if(s->ref_frames==0){
4166             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
4167             return -1;
4168         }
4169     }
4170
4171     s->current_picture.reference= 1;
4172     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4173         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4174         return -1;
4175     }
4176
4177     s->current_picture.key_frame= s->keyframe;
4178
4179     return 0;
4180 }
4181
4182 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4183     SnowContext *s = avctx->priv_data;
4184     RangeCoder * const c= &s->c;
4185     AVFrame *pict = data;
4186     const int width= s->avctx->width;
4187     const int height= s->avctx->height;
4188     int level, orientation, plane_index, i, y;
4189     uint8_t rc_header_bak[sizeof(s->header_state)];
4190     uint8_t rc_block_bak[sizeof(s->block_state)];
4191
4192     ff_init_range_encoder(c, buf, buf_size);
4193     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4194
4195     for(i=0; i<3; i++){
4196         int shift= !!i;
4197         for(y=0; y<(height>>shift); y++)
4198             memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4199                    &pict->data[i][y * pict->linesize[i]],
4200                    width>>shift);
4201     }
4202     s->new_picture = *pict;
4203
4204     s->m.picture_number= avctx->frame_number;
4205     if(avctx->flags&CODEC_FLAG_PASS2){
4206         s->m.pict_type =
4207         pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4208         s->keyframe= pict->pict_type==FF_I_TYPE;
4209         if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4210             pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4211             if (pict->quality < 0)
4212                 return -1;
4213         }
4214     }else{
4215         s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4216         s->m.pict_type=
4217         pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4218     }
4219
4220     if(s->pass1_rc && avctx->frame_number == 0)
4221         pict->quality= 2*FF_QP2LAMBDA;
4222     if(pict->quality){
4223         s->qlog= qscale2qlog(pict->quality);
4224         s->lambda = pict->quality * 3/2;
4225     }
4226     if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4227         s->qlog= LOSSLESS_QLOG;
4228         s->lambda = 0;
4229     }//else keep previous frame's qlog until after motion estimation
4230
4231     frame_start(s);
4232
4233     s->m.current_picture_ptr= &s->m.current_picture;
4234     if(pict->pict_type == FF_P_TYPE){
4235         int block_width = (width +15)>>4;
4236         int block_height= (height+15)>>4;
4237         int stride= s->current_picture.linesize[0];
4238
4239         assert(s->current_picture.data[0]);
4240         assert(s->last_picture[0].data[0]);
4241
4242         s->m.avctx= s->avctx;
4243         s->m.current_picture.data[0]= s->current_picture.data[0];
4244         s->m.   last_picture.data[0]= s->last_picture[0].data[0];
4245         s->m.    new_picture.data[0]= s->  input_picture.data[0];
4246         s->m.   last_picture_ptr= &s->m.   last_picture;
4247         s->m.linesize=
4248         s->m.   last_picture.linesize[0]=
4249         s->m.    new_picture.linesize[0]=
4250         s->m.current_picture.linesize[0]= stride;
4251         s->m.uvlinesize= s->current_picture.linesize[1];
4252         s->m.width = width;
4253         s->m.height= height;
4254         s->m.mb_width = block_width;
4255         s->m.mb_height= block_height;
4256         s->m.mb_stride=   s->m.mb_width+1;
4257         s->m.b8_stride= 2*s->m.mb_width+1;
4258         s->m.f_code=1;
4259         s->m.pict_type= pict->pict_type;
4260         s->m.me_method= s->avctx->me_method;
4261         s->m.me.scene_change_score=0;
4262         s->m.flags= s->avctx->flags;
4263         s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4264         s->m.out_format= FMT_H263;
4265         s->m.unrestricted_mv= 1;
4266
4267         s->m.lambda = s->lambda;
4268         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4269         s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4270
4271         s->m.dsp= s->dsp; //move
4272         ff_init_me(&s->m);
4273         s->dsp= s->m.dsp;
4274     }
4275
4276     if(s->pass1_rc){
4277         memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4278         memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4279     }
4280
4281 redo_frame:
4282
4283     if(pict->pict_type == FF_I_TYPE)
4284         s->spatial_decomposition_count= 5;
4285     else
4286         s->spatial_decomposition_count= 5;
4287
4288     s->m.pict_type = pict->pict_type;
4289     s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4290
4291     common_init_after_header(avctx);
4292
4293     if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4294         for(plane_index=0; plane_index<3; plane_index++){
4295             calculate_visual_weight(s, &s->plane[plane_index]);
4296         }
4297     }
4298
4299     encode_header(s);
4300     s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4301     encode_blocks(s, 1);
4302     s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4303
4304     for(plane_index=0; plane_index<3; plane_index++){
4305         Plane *p= &s->plane[plane_index];
4306         int w= p->width;
4307         int h= p->height;
4308         int x, y;
4309 //        int bits= put_bits_count(&s->c.pb);
4310
4311         if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4312             //FIXME optimize
4313             if(pict->data[plane_index]) //FIXME gray hack
4314                 for(y=0; y<h; y++){
4315                     for(x=0; x<w; x++){
4316                         s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4317                     }
4318                 }
4319             predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4320
4321             if(   plane_index==0
4322                && pict->pict_type == FF_P_TYPE
4323                && !(avctx->flags&CODEC_FLAG_PASS2)
4324                && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4325                 ff_init_range_encoder(c, buf, buf_size);
4326                 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4327                 pict->pict_type= FF_I_TYPE;
4328                 s->keyframe=1;
4329                 s->current_picture.key_frame=1;
4330                 goto redo_frame;
4331             }
4332
4333             if(s->qlog == LOSSLESS_QLOG){
4334                 for(y=0; y<h; y++){
4335                     for(x=0; x<w; x++){
4336                         s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4337                     }
4338                 }
4339             }else{
4340                 for(y=0; y<h; y++){
4341                     for(x=0; x<w; x++){
4342                         s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4343                     }
4344                 }
4345             }
4346
4347             /*  if(QUANTIZE2)
4348                 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4349             else*/
4350                 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4351
4352             if(s->pass1_rc && plane_index==0){
4353                 int delta_qlog = ratecontrol_1pass(s, pict);
4354                 if (delta_qlog <= INT_MIN)
4355                     return -1;
4356                 if(delta_qlog){
4357                     //reordering qlog in the bitstream would eliminate this reset
4358                     ff_init_range_encoder(c, buf, buf_size);
4359                     memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4360                     memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4361                     encode_header(s);
4362                     encode_blocks(s, 0);
4363                 }
4364             }
4365
4366             for(level=0; level<s->spatial_decomposition_count; level++){
4367                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4368                     SubBand *b= &p->band[level][orientation];
4369
4370                     if(!QUANTIZE2)
4371                         quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4372                     if(orientation==0)
4373                         decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4374                     encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4375                     assert(b->parent==NULL || b->parent->stride == b->stride*2);
4376                     if(orientation==0)
4377                         correlate(s, b, b->ibuf, b->stride, 1, 0);
4378                 }
4379             }
4380
4381             for(level=0; level<s->spatial_decomposition_count; level++){
4382                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4383                     SubBand *b= &p->band[level][orientation];
4384
4385                     dequantize(s, b, b->ibuf, b->stride);
4386                 }
4387             }
4388
4389             ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4390             if(s->qlog == LOSSLESS_QLOG){
4391                 for(y=0; y<h; y++){
4392                     for(x=0; x<w; x++){
4393                         s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4394                     }
4395                 }
4396             }
4397             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4398         }else{
4399             //ME/MC only
4400             if(pict->pict_type == FF_I_TYPE){
4401                 for(y=0; y<h; y++){
4402                     for(x=0; x<w; x++){
4403                         s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4404                             pict->data[plane_index][y*pict->linesize[plane_index] + x];
4405                     }
4406                 }
4407             }else{
4408                 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4409                 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4410             }
4411         }
4412         if(s->avctx->flags&CODEC_FLAG_PSNR){
4413             int64_t error= 0;
4414
4415             if(pict->data[plane_index]) //FIXME gray hack
4416                 for(y=0; y<h; y++){
4417                     for(x=0; x<w; x++){
4418                         int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4419                         error += d*d;
4420                     }
4421                 }
4422             s->avctx->error[plane_index] += error;
4423             s->current_picture.error[plane_index] = error;
4424         }
4425
4426     }
4427
4428     update_last_header_values(s);
4429
4430     release_buffer(avctx);
4431
4432     s->current_picture.coded_picture_number = avctx->frame_number;
4433     s->current_picture.pict_type = pict->pict_type;
4434     s->current_picture.quality = pict->quality;
4435     s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4436     s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4437     s->m.current_picture.display_picture_number =
4438     s->m.current_picture.coded_picture_number = avctx->frame_number;
4439     s->m.current_picture.quality = pict->quality;
4440     s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4441     if(s->pass1_rc)
4442         if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4443             return -1;
4444     if(avctx->flags&CODEC_FLAG_PASS1)
4445         ff_write_pass1_stats(&s->m);
4446     s->m.last_pict_type = s->m.pict_type;
4447     avctx->frame_bits = s->m.frame_bits;
4448     avctx->mv_bits = s->m.mv_bits;
4449     avctx->misc_bits = s->m.misc_bits;
4450     avctx->p_tex_bits = s->m.p_tex_bits;
4451
4452     emms_c();
4453
4454     return ff_rac_terminate(c);
4455 }
4456
4457 static av_cold void common_end(SnowContext *s){
4458     int plane_index, level, orientation, i;
4459
4460     av_freep(&s->spatial_dwt_buffer);
4461     av_freep(&s->spatial_idwt_buffer);
4462
4463     s->m.me.temp= NULL;
4464     av_freep(&s->m.me.scratchpad);
4465     av_freep(&s->m.me.map);
4466     av_freep(&s->m.me.score_map);
4467     av_freep(&s->m.obmc_scratchpad);
4468
4469     av_freep(&s->block);
4470     av_freep(&s->scratchbuf);
4471
4472     for(i=0; i<MAX_REF_FRAMES; i++){
4473         av_freep(&s->ref_mvs[i]);
4474         av_freep(&s->ref_scores[i]);
4475         if(s->last_picture[i].data[0])
4476             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4477     }
4478
4479     for(plane_index=0; plane_index<3; plane_index++){
4480         for(level=s->spatial_decomposition_count-1; level>=0; level--){
4481             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4482                 SubBand *b= &s->plane[plane_index].band[level][orientation];
4483
4484                 av_freep(&b->x_coeff);
4485             }
4486         }
4487     }
4488 }
4489
4490 static av_cold int encode_end(AVCodecContext *avctx)
4491 {
4492     SnowContext *s = avctx->priv_data;
4493
4494     common_end(s);
4495     av_free(avctx->stats_out);
4496
4497     return 0;
4498 }
4499
4500 static av_cold int decode_init(AVCodecContext *avctx)
4501 {
4502     avctx->pix_fmt= PIX_FMT_YUV420P;
4503
4504     common_init(avctx);
4505
4506     return 0;
4507 }
4508
4509 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
4510     const uint8_t *buf = avpkt->data;
4511     int buf_size = avpkt->size;
4512     SnowContext *s = avctx->priv_data;
4513     RangeCoder * const c= &s->c;
4514     int bytes_read;
4515     AVFrame *picture = data;
4516     int level, orientation, plane_index;
4517
4518     ff_init_range_decoder(c, buf, buf_size);
4519     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4520
4521     s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4522     if(decode_header(s)<0)
4523         return -1;
4524     common_init_after_header(avctx);
4525
4526     // realloc slice buffer for the case that spatial_decomposition_count changed
4527     slice_buffer_destroy(&s->sb);
4528     slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4529
4530     for(plane_index=0; plane_index<3; plane_index++){
4531         Plane *p= &s->plane[plane_index];
4532         p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4533                                               && p->hcoeff[1]==-10
4534                                               && p->hcoeff[2]==2;
4535     }
4536
4537     alloc_blocks(s);
4538
4539     if(frame_start(s) < 0)
4540         return -1;
4541     //keyframe flag duplication mess FIXME
4542     if(avctx->debug&FF_DEBUG_PICT_INFO)
4543         av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4544
4545     decode_blocks(s);
4546
4547     for(plane_index=0; plane_index<3; plane_index++){
4548         Plane *p= &s->plane[plane_index];
4549         int w= p->width;
4550         int h= p->height;
4551         int x, y;
4552         int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4553
4554         if(s->avctx->debug&2048){
4555             memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4556             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4557
4558             for(y=0; y<h; y++){
4559                 for(x=0; x<w; x++){
4560                     int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4561                     s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4562                 }
4563             }
4564         }
4565
4566         {
4567         for(level=0; level<s->spatial_decomposition_count; level++){
4568             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4569                 SubBand *b= &p->band[level][orientation];
4570                 unpack_coeffs(s, b, b->parent, orientation);
4571             }
4572         }
4573         }
4574
4575         {
4576         const int mb_h= s->b_height << s->block_max_depth;
4577         const int block_size = MB_SIZE >> s->block_max_depth;
4578         const int block_w    = plane_index ? block_size/2 : block_size;
4579         int mb_y;
4580         DWTCompose cs[MAX_DECOMPOSITIONS];
4581         int yd=0, yq=0;
4582         int y;
4583         int end_y;
4584
4585         ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4586         for(mb_y=0; mb_y<=mb_h; mb_y++){
4587
4588             int slice_starty = block_w*mb_y;
4589             int slice_h = block_w*(mb_y+1);
4590             if (!(s->keyframe || s->avctx->debug&512)){
4591                 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4592                 slice_h -= (block_w >> 1);
4593             }
4594
4595             for(level=0; level<s->spatial_decomposition_count; level++){
4596                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4597                     SubBand *b= &p->band[level][orientation];
4598                     int start_y;
4599                     int end_y;
4600                     int our_mb_start = mb_y;
4601                     int our_mb_end = (mb_y + 1);
4602                     const int extra= 3;
4603                     start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4604                     end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4605                     if (!(s->keyframe || s->avctx->debug&512)){
4606                         start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4607                         end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4608                     }
4609                     start_y = FFMIN(b->height, start_y);
4610                     end_y = FFMIN(b->height, end_y);
4611
4612                     if (start_y != end_y){
4613                         if (orientation == 0){
4614                             SubBand * correlate_band = &p->band[0][0];
4615                             int correlate_end_y = FFMIN(b->height, end_y + 1);
4616                             int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4617                             decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4618                             correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4619                             dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4620                         }
4621                         else
4622                             decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4623                     }
4624                 }
4625             }
4626
4627             for(; yd<slice_h; yd+=4){
4628                 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4629             }
4630
4631             if(s->qlog == LOSSLESS_QLOG){
4632                 for(; yq<slice_h && yq<h; yq++){
4633                     IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4634                     for(x=0; x<w; x++){
4635                         line[x] <<= FRAC_BITS;
4636                     }
4637                 }
4638             }
4639
4640             predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4641
4642             y = FFMIN(p->height, slice_starty);
4643             end_y = FFMIN(p->height, slice_h);
4644             while(y < end_y)
4645                 slice_buffer_release(&s->sb, y++);
4646         }
4647
4648         slice_buffer_flush(&s->sb);
4649         }
4650
4651     }
4652
4653     emms_c();
4654
4655     release_buffer(avctx);
4656
4657     if(!(s->avctx->debug&2048))
4658         *picture= s->current_picture;
4659     else
4660         *picture= s->mconly_picture;
4661
4662     *data_size = sizeof(AVFrame);
4663
4664     bytes_read= c->bytestream - c->bytestream_start;
4665     if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4666
4667     return bytes_read;
4668 }
4669
4670 static av_cold int decode_end(AVCodecContext *avctx)
4671 {
4672     SnowContext *s = avctx->priv_data;
4673
4674     slice_buffer_destroy(&s->sb);
4675
4676     common_end(s);
4677
4678     return 0;
4679 }
4680
4681 AVCodec snow_decoder = {
4682     "snow",
4683     CODEC_TYPE_VIDEO,
4684     CODEC_ID_SNOW,
4685     sizeof(SnowContext),
4686     decode_init,
4687     NULL,
4688     decode_end,
4689     decode_frame,
4690     0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4691     NULL,
4692     .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4693 };
4694
4695 #if CONFIG_SNOW_ENCODER
4696 AVCodec snow_encoder = {
4697     "snow",
4698     CODEC_TYPE_VIDEO,
4699     CODEC_ID_SNOW,
4700     sizeof(SnowContext),
4701     encode_init,
4702     encode_frame,
4703     encode_end,
4704     .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4705 };
4706 #endif
4707
4708
4709 #ifdef TEST
4710 #undef malloc
4711 #undef free
4712 #undef printf
4713
4714 #include "libavutil/lfg.h"
4715
4716 int main(void){
4717     int width=256;
4718     int height=256;
4719     int buffer[2][width*height];
4720     SnowContext s;
4721     int i;
4722     AVLFG prn;
4723     s.spatial_decomposition_count=6;
4724     s.spatial_decomposition_type=1;
4725
4726     av_lfg_init(&prn, 1);
4727
4728     printf("testing 5/3 DWT\n");
4729     for(i=0; i<width*height; i++)
4730         buffer[0][i] = buffer[1][i] = av_lfg_get(&prn) % 54321 - 12345;
4731
4732     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4733     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4734
4735     for(i=0; i<width*height; i++)
4736         if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4737
4738     printf("testing 9/7 DWT\n");
4739     s.spatial_decomposition_type=0;
4740     for(i=0; i<width*height; i++)
4741         buffer[0][i] = buffer[1][i] = av_lfg_get(&prn) % 54321 - 12345;
4742
4743     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4744     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4745
4746     for(i=0; i<width*height; i++)
4747         if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4748
4749 #if 0
4750     printf("testing AC coder\n");
4751     memset(s.header_state, 0, sizeof(s.header_state));
4752     ff_init_range_encoder(&s.c, buffer[0], 256*256);
4753     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4754
4755     for(i=-256; i<256; i++){
4756         put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4757     }
4758     ff_rac_terminate(&s.c);
4759
4760     memset(s.header_state, 0, sizeof(s.header_state));
4761     ff_init_range_decoder(&s.c, buffer[0], 256*256);
4762     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4763
4764     for(i=-256; i<256; i++){
4765         int j;
4766         j= get_symbol(&s.c, s.header_state, 1);
4767         if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4768     }
4769 #endif
4770     {
4771     int level, orientation, x, y;
4772     int64_t errors[8][4];
4773     int64_t g=0;
4774
4775         memset(errors, 0, sizeof(errors));
4776         s.spatial_decomposition_count=3;
4777         s.spatial_decomposition_type=0;
4778         for(level=0; level<s.spatial_decomposition_count; level++){
4779             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4780                 int w= width  >> (s.spatial_decomposition_count-level);
4781                 int h= height >> (s.spatial_decomposition_count-level);
4782                 int stride= width  << (s.spatial_decomposition_count-level);
4783                 DWTELEM *buf= buffer[0];
4784                 int64_t error=0;
4785
4786                 if(orientation&1) buf+=w;
4787                 if(orientation>1) buf+=stride>>1;
4788
4789                 memset(buffer[0], 0, sizeof(int)*width*height);
4790                 buf[w/2 + h/2*stride]= 256*256;
4791                 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4792                 for(y=0; y<height; y++){
4793                     for(x=0; x<width; x++){
4794                         int64_t d= buffer[0][x + y*width];
4795                         error += d*d;
4796                         if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4797                     }
4798                     if(FFABS(height/2-y)<9 && level==2) printf("\n");
4799                 }
4800                 error= (int)(sqrt(error)+0.5);
4801                 errors[level][orientation]= error;
4802                 if(g) g=av_gcd(g, error);
4803                 else g= error;
4804             }
4805         }
4806         printf("static int const visual_weight[][4]={\n");
4807         for(level=0; level<s.spatial_decomposition_count; level++){
4808             printf("  {");
4809             for(orientation=0; orientation<4; orientation++){
4810                 printf("%8"PRId64",", errors[level][orientation]/g);
4811             }
4812             printf("},\n");
4813         }
4814         printf("};\n");
4815         {
4816             int level=2;
4817             int w= width  >> (s.spatial_decomposition_count-level);
4818             //int h= height >> (s.spatial_decomposition_count-level);
4819             int stride= width  << (s.spatial_decomposition_count-level);
4820             DWTELEM *buf= buffer[0];
4821             int64_t error=0;
4822
4823             buf+=w;
4824             buf+=stride>>1;
4825
4826             memset(buffer[0], 0, sizeof(int)*width*height);
4827 #if 1
4828             for(y=0; y<height; y++){
4829                 for(x=0; x<width; x++){
4830                     int tab[4]={0,2,3,1};
4831                     buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4832                 }
4833             }
4834             ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4835 #else
4836             for(y=0; y<h; y++){
4837                 for(x=0; x<w; x++){
4838                     buf[x + y*stride  ]=169;
4839                     buf[x + y*stride-w]=64;
4840                 }
4841             }
4842             ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4843 #endif
4844             for(y=0; y<height; y++){
4845                 for(x=0; x<width; x++){
4846                     int64_t d= buffer[0][x + y*width];
4847                     error += d*d;
4848                     if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4849                 }
4850                 if(FFABS(height/2-y)<9) printf("\n");
4851             }
4852         }
4853
4854     }
4855     return 0;
4856 }
4857 #endif /* TEST */