2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
399 typedef struct x_and_coeff{
404 typedef struct SubBand{
409 int qlog; ///< log(qscale)/log[2^(1/6)]
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
420 typedef struct Plane{
423 SubBand band[MAX_DECOMPOSITIONS][4];
426 int8_t hcoeff[HTAPS_MAX/2];
431 int8_t last_hcoeff[HTAPS_MAX/2];
435 typedef struct SnowContext{
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438 AVCodecContext *avctx;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int temporal_decomposition_count;
460 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
461 uint32_t *ref_scores[MAX_REF_FRAMES];
462 DWTELEM *spatial_dwt_buffer;
463 IDWTELEM *spatial_idwt_buffer;
467 int spatial_scalability;
477 #define QBIAS_SHIFT 3
481 int last_block_max_depth;
482 Plane plane[MAX_PLANES];
484 #define ME_CACHE_SIZE 1024
485 int me_cache[ME_CACHE_SIZE];
486 int me_cache_generation;
489 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
500 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
501 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
503 static void iterative_me(SnowContext *s);
505 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
509 buf->base_buffer = base_buffer;
510 buf->line_count = line_count;
511 buf->line_width = line_width;
512 buf->data_count = max_allocated_lines;
513 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
514 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
516 for (i = 0; i < max_allocated_lines; i++)
518 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
521 buf->data_stack_top = max_allocated_lines - 1;
524 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
529 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
531 assert(buf->data_stack_top >= 0);
532 // assert(!buf->line[line]);
534 return buf->line[line];
536 offset = buf->line_width * line;
537 buffer = buf->data_stack[buf->data_stack_top];
538 buf->data_stack_top--;
539 buf->line[line] = buffer;
541 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
546 static void slice_buffer_release(slice_buffer * buf, int line)
551 assert(line >= 0 && line < buf->line_count);
552 assert(buf->line[line]);
554 offset = buf->line_width * line;
555 buffer = buf->line[line];
556 buf->data_stack_top++;
557 buf->data_stack[buf->data_stack_top] = buffer;
558 buf->line[line] = NULL;
560 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
563 static void slice_buffer_flush(slice_buffer * buf)
566 for (i = 0; i < buf->line_count; i++)
570 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
571 slice_buffer_release(buf, i);
576 static void slice_buffer_destroy(slice_buffer * buf)
579 slice_buffer_flush(buf);
581 for (i = buf->data_count - 1; i >= 0; i--)
583 av_freep(&buf->data_stack[i]);
585 av_freep(&buf->data_stack);
586 av_freep(&buf->line);
590 // Avoid a name clash on SGI IRIX
593 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
594 static uint8_t qexp[QROOT];
596 static inline int mirror(int v, int m){
597 while((unsigned)v > (unsigned)m){
604 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
608 const int a= FFABS(v);
609 const int e= av_log2(a);
611 const int el= FFMIN(e, 10);
612 put_rac(c, state+0, 0);
615 put_rac(c, state+1+i, 1); //1..10
618 put_rac(c, state+1+9, 1); //1..10
620 put_rac(c, state+1+FFMIN(i,9), 0);
622 for(i=e-1; i>=el; i--){
623 put_rac(c, state+22+9, (a>>i)&1); //22..31
626 put_rac(c, state+22+i, (a>>i)&1); //22..31
630 put_rac(c, state+11 + el, v < 0); //11..21
633 put_rac(c, state+0, 0);
636 put_rac(c, state+1+i, 1); //1..10
638 put_rac(c, state+1+i, 0);
640 for(i=e-1; i>=0; i--){
641 put_rac(c, state+22+i, (a>>i)&1); //22..31
645 put_rac(c, state+11 + e, v < 0); //11..21
648 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
650 put_rac(c, state+1+FFMIN(i,9), 0);
652 for(i=e-1; i>=0; i--){
653 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
657 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
661 put_rac(c, state+0, 1);
665 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
666 if(get_rac(c, state+0))
671 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
676 for(i=e-1; i>=0; i--){
677 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
680 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
687 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
689 int r= log2>=0 ? 1<<log2 : 1;
695 put_rac(c, state+4+log2, 1);
700 put_rac(c, state+4+log2, 0);
702 for(i=log2-1; i>=0; i--){
703 put_rac(c, state+31-i, (v>>i)&1);
707 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
709 int r= log2>=0 ? 1<<log2 : 1;
714 while(get_rac(c, state+4+log2)){
720 for(i=log2-1; i>=0; i--){
721 v+= get_rac(c, state+31-i)<<i;
727 static av_always_inline void
728 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
729 int dst_step, int src_step, int ref_step,
730 int width, int mul, int add, int shift,
731 int highpass, int inverse){
732 const int mirror_left= !highpass;
733 const int mirror_right= (width&1) ^ highpass;
734 const int w= (width>>1) - 1 + (highpass & width);
737 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
739 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
746 LIFT(src[i*src_step],
747 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
753 LIFT(src[w*src_step],
754 ((mul*2*ref[w*ref_step]+add)>>shift),
759 static av_always_inline void
760 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
761 int dst_step, int src_step, int ref_step,
762 int width, int mul, int add, int shift,
763 int highpass, int inverse){
764 const int mirror_left= !highpass;
765 const int mirror_right= (width&1) ^ highpass;
766 const int w= (width>>1) - 1 + (highpass & width);
769 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
771 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
778 LIFT(src[i*src_step],
779 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
785 LIFT(src[w*src_step],
786 ((mul*2*ref[w*ref_step]+add)>>shift),
792 static av_always_inline void
793 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
794 int dst_step, int src_step, int ref_step,
795 int width, int mul, int add, int shift,
796 int highpass, int inverse){
797 const int mirror_left= !highpass;
798 const int mirror_right= (width&1) ^ highpass;
799 const int w= (width>>1) - 1 + (highpass & width);
803 #define LIFTS(src, ref, inv) \
805 (src) + (((ref) + 4*(src))>>shift): \
806 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
808 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
815 LIFTS(src[i*src_step],
816 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
822 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
825 static av_always_inline void
826 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
827 int dst_step, int src_step, int ref_step,
828 int width, int mul, int add, int shift,
829 int highpass, int inverse){
830 const int mirror_left= !highpass;
831 const int mirror_right= (width&1) ^ highpass;
832 const int w= (width>>1) - 1 + (highpass & width);
836 #define LIFTS(src, ref, inv) \
838 (src) + (((ref) + 4*(src))>>shift): \
839 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
841 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
848 LIFTS(src[i*src_step],
849 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
855 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
860 static void horizontal_decompose53i(DWTELEM *b, int width){
862 const int width2= width>>1;
864 const int w2= (width+1)>>1;
866 for(x=0; x<width2; x++){
868 temp[x+w2]= b[2*x + 1];
882 for(x=1; x+1<width2; x+=2){
886 A2 += (A1 + A3 + 2)>>2;
890 A1= temp[x+1+width2];
893 A4 += (A1 + A3 + 2)>>2;
899 A2 += (A1 + A3 + 2)>>2;
904 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
905 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
909 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
912 for(i=0; i<width; i++){
913 b1[i] -= (b0[i] + b2[i])>>1;
917 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
920 for(i=0; i<width; i++){
921 b1[i] += (b0[i] + b2[i] + 2)>>2;
925 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
927 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
928 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
930 for(y=-2; y<height; y+=2){
931 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
932 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
935 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
936 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
937 STOP_TIMER("horizontal_decompose53i")}
940 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
941 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
942 STOP_TIMER("vertical_decompose53i*")}
949 static void horizontal_decompose97i(DWTELEM *b, int width){
951 const int w2= (width+1)>>1;
953 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
954 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
955 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
956 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
960 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
963 for(i=0; i<width; i++){
964 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
968 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
971 for(i=0; i<width; i++){
972 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
976 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
979 for(i=0; i<width; i++){
981 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
983 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
988 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
991 for(i=0; i<width; i++){
992 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
996 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
998 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
999 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1000 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1001 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1003 for(y=-4; y<height; y+=2){
1004 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1005 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1008 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1009 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1011 STOP_TIMER("horizontal_decompose97i")
1015 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1016 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1017 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1018 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1021 STOP_TIMER("vertical_decompose97i")
1031 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1034 for(level=0; level<decomposition_count; level++){
1036 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1037 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1042 static void horizontal_compose53i(IDWTELEM *b, int width){
1043 IDWTELEM temp[width];
1044 const int width2= width>>1;
1045 const int w2= (width+1)>>1;
1057 for(x=1; x+1<width2; x+=2){
1061 A2 += (A1 + A3 + 2)>>2;
1065 A1= temp[x+1+width2];
1068 A4 += (A1 + A3 + 2)>>2;
1074 A2 += (A1 + A3 + 2)>>2;
1078 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1079 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1081 for(x=0; x<width2; x++){
1083 b[2*x + 1]= temp[x+w2];
1089 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1092 for(i=0; i<width; i++){
1093 b1[i] += (b0[i] + b2[i])>>1;
1097 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1100 for(i=0; i<width; i++){
1101 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1105 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1106 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1107 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1111 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1112 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1113 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1117 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1120 IDWTELEM *b0= cs->b0;
1121 IDWTELEM *b1= cs->b1;
1122 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1123 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1126 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1127 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1128 STOP_TIMER("vertical_compose53i*")}
1131 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1132 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1133 STOP_TIMER("horizontal_compose53i")}
1140 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1142 IDWTELEM *b0= cs->b0;
1143 IDWTELEM *b1= cs->b1;
1144 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1145 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1148 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1149 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1150 STOP_TIMER("vertical_compose53i*")}
1153 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1154 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1155 STOP_TIMER("horizontal_compose53i")}
1162 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1164 spatial_compose53i_init(&cs, buffer, height, stride);
1165 while(cs.y <= height)
1166 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1170 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1171 IDWTELEM temp[width];
1172 const int w2= (width+1)>>1;
1174 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1175 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1176 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1177 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1180 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1183 for(i=0; i<width; i++){
1184 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1188 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1191 for(i=0; i<width; i++){
1192 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1196 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1199 for(i=0; i<width; i++){
1201 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1203 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1208 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1211 for(i=0; i<width; i++){
1212 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1216 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1219 for(i=0; i<width; i++){
1220 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1221 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1223 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1225 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1227 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1231 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1232 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1233 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1234 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1235 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1239 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1240 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1241 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1242 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1243 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1247 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1250 IDWTELEM *b0= cs->b0;
1251 IDWTELEM *b1= cs->b1;
1252 IDWTELEM *b2= cs->b2;
1253 IDWTELEM *b3= cs->b3;
1254 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1255 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1258 if(y>0 && y+4<height){
1259 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1261 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1262 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1263 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1264 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1267 STOP_TIMER("vertical_compose97i")}}
1270 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1271 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1272 if(width>400 && y+0<(unsigned)height){
1273 STOP_TIMER("horizontal_compose97i")}}
1282 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1284 IDWTELEM *b0= cs->b0;
1285 IDWTELEM *b1= cs->b1;
1286 IDWTELEM *b2= cs->b2;
1287 IDWTELEM *b3= cs->b3;
1288 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1289 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1292 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1293 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1294 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1295 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1297 STOP_TIMER("vertical_compose97i")}}
1300 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1301 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1302 if(width>400 && b0 <= b2){
1303 STOP_TIMER("horizontal_compose97i")}}
1312 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1314 spatial_compose97i_init(&cs, buffer, height, stride);
1315 while(cs.y <= height)
1316 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1319 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1321 for(level=decomposition_count-1; level>=0; level--){
1323 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1324 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1329 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1331 for(level=decomposition_count-1; level>=0; level--){
1333 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1334 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1339 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1340 const int support = type==1 ? 3 : 5;
1344 for(level=decomposition_count-1; level>=0; level--){
1345 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1347 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1349 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1356 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1357 const int support = type==1 ? 3 : 5;
1361 for(level=decomposition_count-1; level>=0; level--){
1362 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1364 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1366 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1373 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1374 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1376 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1377 for(y=0; y<height; y+=4)
1378 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1381 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1382 const int w= b->width;
1383 const int h= b->height;
1395 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1396 v= src[x + y*stride];
1399 t= src[x + (y-1)*stride];
1401 lt= src[x - 1 + (y-1)*stride];
1404 rt= src[x + 1 + (y-1)*stride];
1408 l= src[x - 1 + y*stride];
1410 if(orientation==1) ll= src[y + (x-2)*stride];
1411 else ll= src[x - 2 + y*stride];
1417 if(px<b->parent->width && py<b->parent->height)
1418 p= parent[px + py*2*stride];
1420 if(!(/*ll|*/l|lt|t|rt|p)){
1422 runs[run_index++]= run;
1430 max_index= run_index;
1431 runs[run_index++]= run;
1433 run= runs[run_index++];
1435 put_symbol2(&s->c, b->state[30], max_index, 0);
1436 if(run_index <= max_index)
1437 put_symbol2(&s->c, b->state[1], run, 3);
1440 if(s->c.bytestream_end - s->c.bytestream < w*40){
1441 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1446 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1447 v= src[x + y*stride];
1450 t= src[x + (y-1)*stride];
1452 lt= src[x - 1 + (y-1)*stride];
1455 rt= src[x + 1 + (y-1)*stride];
1459 l= src[x - 1 + y*stride];
1461 if(orientation==1) ll= src[y + (x-2)*stride];
1462 else ll= src[x - 2 + y*stride];
1468 if(px<b->parent->width && py<b->parent->height)
1469 p= parent[px + py*2*stride];
1471 if(/*ll|*/l|lt|t|rt|p){
1472 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1474 put_rac(&s->c, &b->state[0][context], !!v);
1477 run= runs[run_index++];
1479 if(run_index <= max_index)
1480 put_symbol2(&s->c, b->state[1], run, 3);
1488 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1489 int l2= 2*FFABS(l) + (l<0);
1490 int t2= 2*FFABS(t) + (t<0);
1492 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1493 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1501 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1502 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1503 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1504 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1505 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1508 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1509 const int w= b->width;
1510 const int h= b->height;
1515 x_and_coeff *xc= b->x_coeff;
1516 x_and_coeff *prev_xc= NULL;
1517 x_and_coeff *prev2_xc= xc;
1518 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1519 x_and_coeff *prev_parent_xc= parent_xc;
1521 runs= get_symbol2(&s->c, b->state[30], 0);
1522 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1527 int lt=0, t=0, rt=0;
1529 if(y && prev_xc->x == 0){
1541 if(prev_xc->x == x + 1)
1547 if(x>>1 > parent_xc->x){
1550 if(x>>1 == parent_xc->x){
1551 p= parent_xc->coeff;
1554 if(/*ll|*/l|lt|t|rt|p){
1555 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1557 v=get_rac(&s->c, &b->state[0][context]);
1559 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1560 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1567 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1569 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1570 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1579 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1580 else max_run= FFMIN(run, w-x-1);
1582 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1588 (xc++)->x= w+1; //end marker
1594 while(parent_xc->x != parent->width+1)
1597 prev_parent_xc= parent_xc;
1599 parent_xc= prev_parent_xc;
1604 (xc++)->x= w+1; //end marker
1608 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1609 const int w= b->width;
1611 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1612 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1613 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1618 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1623 /* If we are on the second or later slice, restore our index. */
1625 new_index = save_state[0];
1628 for(y=start_y; y<h; y++){
1631 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1632 memset(line, 0, b->width*sizeof(IDWTELEM));
1633 v = b->x_coeff[new_index].coeff;
1634 x = b->x_coeff[new_index++].x;
1637 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1638 register int u= -(v&1);
1639 line[x] = (t^u) - u;
1641 v = b->x_coeff[new_index].coeff;
1642 x = b->x_coeff[new_index++].x;
1645 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1646 STOP_TIMER("decode_subband")
1649 /* Save our variables for the next slice. */
1650 save_state[0] = new_index;
1655 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1656 int plane_index, level, orientation;
1658 for(plane_index=0; plane_index<3; plane_index++){
1659 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1660 for(orientation=level ? 1:0; orientation<4; orientation++){
1661 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1665 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1666 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1669 static int alloc_blocks(SnowContext *s){
1670 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1671 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1676 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1680 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1681 uint8_t *bytestream= d->bytestream;
1682 uint8_t *bytestream_start= d->bytestream_start;
1684 d->bytestream= bytestream;
1685 d->bytestream_start= bytestream_start;
1688 //near copy & paste from dsputil, FIXME
1689 static int pix_sum(uint8_t * pix, int line_size, int w)
1694 for (i = 0; i < w; i++) {
1695 for (j = 0; j < w; j++) {
1699 pix += line_size - w;
1704 //near copy & paste from dsputil, FIXME
1705 static int pix_norm1(uint8_t * pix, int line_size, int w)
1708 uint32_t *sq = ff_squareTbl + 256;
1711 for (i = 0; i < w; i++) {
1712 for (j = 0; j < w; j ++) {
1716 pix += line_size - w;
1721 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1722 const int w= s->b_width << s->block_max_depth;
1723 const int rem_depth= s->block_max_depth - level;
1724 const int index= (x + y*w) << rem_depth;
1725 const int block_w= 1<<rem_depth;
1738 for(j=0; j<block_w; j++){
1739 for(i=0; i<block_w; i++){
1740 s->block[index + i + j*w]= block;
1745 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1746 const int offset[3]= {
1748 ((y*c->uvstride + x)>>1),
1749 ((y*c->uvstride + x)>>1),
1753 c->src[0][i]= src [i];
1754 c->ref[0][i]= ref [i] + offset[i];
1759 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1760 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1761 if(s->ref_frames == 1){
1762 *mx = mid_pred(left->mx, top->mx, tr->mx);
1763 *my = mid_pred(left->my, top->my, tr->my);
1765 const int *scale = scale_mv_ref[ref];
1766 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1767 (top ->mx * scale[top ->ref] + 128) >>8,
1768 (tr ->mx * scale[tr ->ref] + 128) >>8);
1769 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1770 (top ->my * scale[top ->ref] + 128) >>8,
1771 (tr ->my * scale[tr ->ref] + 128) >>8);
1778 #define P_TOPRIGHT P[3]
1779 #define P_MEDIAN P[4]
1781 #define FLAG_QPEL 1 //must be 1
1783 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1784 uint8_t p_buffer[1024];
1785 uint8_t i_buffer[1024];
1786 uint8_t p_state[sizeof(s->block_state)];
1787 uint8_t i_state[sizeof(s->block_state)];
1789 uint8_t *pbbak= s->c.bytestream;
1790 uint8_t *pbbak_start= s->c.bytestream_start;
1791 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1792 const int w= s->b_width << s->block_max_depth;
1793 const int h= s->b_height << s->block_max_depth;
1794 const int rem_depth= s->block_max_depth - level;
1795 const int index= (x + y*w) << rem_depth;
1796 const int block_w= 1<<(LOG2_MB_SIZE - level);
1797 int trx= (x+1)<<rem_depth;
1798 int try= (y+1)<<rem_depth;
1799 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1800 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1801 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1802 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1803 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1804 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1805 int pl = left->color[0];
1806 int pcb= left->color[1];
1807 int pcr= left->color[2];
1811 const int stride= s->current_picture.linesize[0];
1812 const int uvstride= s->current_picture.linesize[1];
1813 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1814 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1815 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1817 int16_t last_mv[3][2];
1818 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1819 const int shift= 1+qpel;
1820 MotionEstContext *c= &s->m.me;
1821 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1822 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1823 int my_context= av_log2(2*FFABS(left->my - top->my));
1824 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1825 int ref, best_ref, ref_score, ref_mx, ref_my;
1827 assert(sizeof(s->block_state) >= 256);
1829 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1833 // clip predictors / edge ?
1835 P_LEFT[0]= left->mx;
1836 P_LEFT[1]= left->my;
1839 P_TOPRIGHT[0]= tr->mx;
1840 P_TOPRIGHT[1]= tr->my;
1842 last_mv[0][0]= s->block[index].mx;
1843 last_mv[0][1]= s->block[index].my;
1844 last_mv[1][0]= right->mx;
1845 last_mv[1][1]= right->my;
1846 last_mv[2][0]= bottom->mx;
1847 last_mv[2][1]= bottom->my;
1854 assert(c-> stride == stride);
1855 assert(c->uvstride == uvstride);
1857 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1858 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1859 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1860 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1862 c->xmin = - x*block_w - 16+2;
1863 c->ymin = - y*block_w - 16+2;
1864 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1865 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1867 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1868 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1869 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1870 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1871 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1872 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1873 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1875 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1876 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1879 c->pred_x= P_LEFT[0];
1880 c->pred_y= P_LEFT[1];
1882 c->pred_x = P_MEDIAN[0];
1883 c->pred_y = P_MEDIAN[1];
1888 for(ref=0; ref<s->ref_frames; ref++){
1889 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1891 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1892 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1894 assert(ref_mx >= c->xmin);
1895 assert(ref_mx <= c->xmax);
1896 assert(ref_my >= c->ymin);
1897 assert(ref_my <= c->ymax);
1899 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1900 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1901 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1902 if(s->ref_mvs[ref]){
1903 s->ref_mvs[ref][index][0]= ref_mx;
1904 s->ref_mvs[ref][index][1]= ref_my;
1905 s->ref_scores[ref][index]= ref_score;
1907 if(score > ref_score){
1914 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1917 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1919 pc.bytestream_start=
1920 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1921 memcpy(p_state, s->block_state, sizeof(s->block_state));
1923 if(level!=s->block_max_depth)
1924 put_rac(&pc, &p_state[4 + s_context], 1);
1925 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1926 if(s->ref_frames > 1)
1927 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1928 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1929 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1930 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1931 p_len= pc.bytestream - pc.bytestream_start;
1932 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1934 block_s= block_w*block_w;
1935 sum = pix_sum(current_data[0], stride, block_w);
1936 l= (sum + block_s/2)/block_s;
1937 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1939 block_s= block_w*block_w>>2;
1940 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1941 cb= (sum + block_s/2)/block_s;
1942 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1943 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1944 cr= (sum + block_s/2)/block_s;
1945 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1948 ic.bytestream_start=
1949 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1950 memcpy(i_state, s->block_state, sizeof(s->block_state));
1951 if(level!=s->block_max_depth)
1952 put_rac(&ic, &i_state[4 + s_context], 1);
1953 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1954 put_symbol(&ic, &i_state[32], l-pl , 1);
1955 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1956 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1957 i_len= ic.bytestream - ic.bytestream_start;
1958 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1960 // assert(score==256*256*256*64-1);
1961 assert(iscore < 255*255*256 + s->lambda2*10);
1962 assert(iscore >= 0);
1963 assert(l>=0 && l<=255);
1964 assert(pl>=0 && pl<=255);
1967 int varc= iscore >> 8;
1968 int vard= score >> 8;
1969 if (vard <= 64 || vard < varc)
1970 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1972 c->scene_change_score+= s->m.qscale;
1975 if(level!=s->block_max_depth){
1976 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1977 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1978 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1979 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1980 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1981 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1983 if(score2 < score && score2 < iscore)
1988 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1989 memcpy(pbbak, i_buffer, i_len);
1991 s->c.bytestream_start= pbbak_start;
1992 s->c.bytestream= pbbak + i_len;
1993 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1994 memcpy(s->block_state, i_state, sizeof(s->block_state));
1997 memcpy(pbbak, p_buffer, p_len);
1999 s->c.bytestream_start= pbbak_start;
2000 s->c.bytestream= pbbak + p_len;
2001 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2002 memcpy(s->block_state, p_state, sizeof(s->block_state));
2007 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2008 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2009 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2011 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2015 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2016 const int w= s->b_width << s->block_max_depth;
2017 const int rem_depth= s->block_max_depth - level;
2018 const int index= (x + y*w) << rem_depth;
2019 int trx= (x+1)<<rem_depth;
2020 BlockNode *b= &s->block[index];
2021 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2022 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2023 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2024 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2025 int pl = left->color[0];
2026 int pcb= left->color[1];
2027 int pcr= left->color[2];
2029 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2030 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2031 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2032 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2035 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2039 if(level!=s->block_max_depth){
2040 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2041 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2043 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2044 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2045 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2046 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2047 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2051 if(b->type & BLOCK_INTRA){
2052 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2053 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2054 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2055 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2056 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2057 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2059 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2060 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2061 if(s->ref_frames > 1)
2062 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2063 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2064 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2065 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2069 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2070 const int w= s->b_width << s->block_max_depth;
2071 const int rem_depth= s->block_max_depth - level;
2072 const int index= (x + y*w) << rem_depth;
2073 int trx= (x+1)<<rem_depth;
2074 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2075 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2076 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2077 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2078 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2081 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2085 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2087 int l = left->color[0];
2088 int cb= left->color[1];
2089 int cr= left->color[2];
2091 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2092 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2093 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2095 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2098 pred_mv(s, &mx, &my, 0, left, top, tr);
2099 l += get_symbol(&s->c, &s->block_state[32], 1);
2100 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2101 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2103 if(s->ref_frames > 1)
2104 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2105 pred_mv(s, &mx, &my, ref, left, top, tr);
2106 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2107 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2109 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2111 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2112 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2113 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2114 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2118 static void encode_blocks(SnowContext *s, int search){
2123 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2127 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2128 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2132 if(s->avctx->me_method == ME_ITER || !search)
2133 encode_q_branch2(s, 0, x, y);
2135 encode_q_branch (s, 0, x, y);
2140 static void decode_blocks(SnowContext *s){
2147 decode_q_branch(s, 0, x, y);
2152 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2153 const static uint8_t weight[64]={
2164 const static uint8_t brane[256]={
2165 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2166 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2167 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2168 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2169 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2170 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2171 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2172 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2173 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2174 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2175 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2176 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2177 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2178 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2179 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2180 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2183 const static uint8_t needs[16]={
2191 int16_t tmpIt [64*(32+HTAPS_MAX)];
2192 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2193 int16_t *tmpI= tmpIt;
2194 uint8_t *tmp2= tmp2t[0];
2197 assert(dx<16 && dy<16);
2198 r= brane[dx + 16*dy]&15;
2199 l= brane[dx + 16*dy]>>4;
2201 b= needs[l] | needs[r];
2202 if(p && !p->diag_mc)
2206 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2207 for(x=0; x < b_w; x++){
2208 int a_1=src[x + HTAPS_MAX/2-4];
2209 int a0= src[x + HTAPS_MAX/2-3];
2210 int a1= src[x + HTAPS_MAX/2-2];
2211 int a2= src[x + HTAPS_MAX/2-1];
2212 int a3= src[x + HTAPS_MAX/2+0];
2213 int a4= src[x + HTAPS_MAX/2+1];
2214 int a5= src[x + HTAPS_MAX/2+2];
2215 int a6= src[x + HTAPS_MAX/2+3];
2217 if(!p || p->fast_mc){
2218 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2222 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2227 if(am&(~255)) am= ~(am>>31);
2236 src += HTAPS_MAX/2 - 1;
2240 for(y=0; y < b_h; y++){
2241 for(x=0; x < b_w+1; x++){
2242 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2243 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2244 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2245 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2246 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2247 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2248 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2249 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2251 if(!p || p->fast_mc)
2252 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2254 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2256 if(am&(~255)) am= ~(am>>31);
2264 src += stride*(HTAPS_MAX/2 - 1);
2268 for(y=0; y < b_h; y++){
2269 for(x=0; x < b_w; x++){
2270 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2271 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2272 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2273 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2274 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2275 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2276 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2277 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2279 if(!p || p->fast_mc)
2280 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2282 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2283 if(am&(~255)) am= ~(am>>31);
2292 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2297 hpel[ 6]= tmp2t[1] + 1;
2299 hpel[ 8]= src + stride;
2300 hpel[ 9]= hpel[1] + stride;
2301 hpel[10]= hpel[8] + 1;
2304 uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2305 uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2306 uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2307 uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2310 for(y=0; y < b_h; y++){
2311 for(x=0; x < b_w; x++){
2312 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2313 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2322 uint8_t *src1= hpel[l];
2323 uint8_t *src2= hpel[r];
2324 int a= weight[((dx&7) + (8*(dy&7)))];
2326 for(y=0; y < b_h; y++){
2327 for(x=0; x < b_w; x++){
2328 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2335 STOP_TIMER("mc_block")
2338 #define mca(dx,dy,b_w)\
2339 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2340 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2342 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2354 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2355 if(block->type & BLOCK_INTRA){
2357 const int color = block->color[plane_index];
2358 const int color4= color*0x01010101;
2360 for(y=0; y < b_h; y++){
2361 *(uint32_t*)&dst[0 + y*stride]= color4;
2362 *(uint32_t*)&dst[4 + y*stride]= color4;
2363 *(uint32_t*)&dst[8 + y*stride]= color4;
2364 *(uint32_t*)&dst[12+ y*stride]= color4;
2365 *(uint32_t*)&dst[16+ y*stride]= color4;
2366 *(uint32_t*)&dst[20+ y*stride]= color4;
2367 *(uint32_t*)&dst[24+ y*stride]= color4;
2368 *(uint32_t*)&dst[28+ y*stride]= color4;
2371 for(y=0; y < b_h; y++){
2372 *(uint32_t*)&dst[0 + y*stride]= color4;
2373 *(uint32_t*)&dst[4 + y*stride]= color4;
2374 *(uint32_t*)&dst[8 + y*stride]= color4;
2375 *(uint32_t*)&dst[12+ y*stride]= color4;
2378 for(y=0; y < b_h; y++){
2379 *(uint32_t*)&dst[0 + y*stride]= color4;
2380 *(uint32_t*)&dst[4 + y*stride]= color4;
2383 for(y=0; y < b_h; y++){
2384 *(uint32_t*)&dst[0 + y*stride]= color4;
2387 for(y=0; y < b_h; y++){
2388 for(x=0; x < b_w; x++){
2389 dst[x + y*stride]= color;
2394 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2395 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2396 int mx= block->mx*scale;
2397 int my= block->my*scale;
2398 const int dx= mx&15;
2399 const int dy= my&15;
2400 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2401 sx += (mx>>4) - (HTAPS_MAX/2-1);
2402 sy += (my>>4) - (HTAPS_MAX/2-1);
2403 src += sx + sy*stride;
2404 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2405 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2406 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2409 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2410 // assert(!(b_w&(b_w-1)));
2411 assert(b_w>1 && b_h>1);
2412 assert(tab_index>=0 && tab_index<4 || b_w==32);
2413 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2414 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2417 for(y=0; y<b_h; y+=16){
2418 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2419 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2422 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2423 else if(b_w==2*b_h){
2424 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2425 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2428 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2429 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2434 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2435 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2438 for(y=0; y<b_h; y++){
2439 //FIXME ugly misuse of obmc_stride
2440 const uint8_t *obmc1= obmc + y*obmc_stride;
2441 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2442 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2443 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2444 dst = slice_buffer_get_line(sb, src_y + y);
2445 for(x=0; x<b_w; x++){
2446 int v= obmc1[x] * block[3][x + y*src_stride]
2447 +obmc2[x] * block[2][x + y*src_stride]
2448 +obmc3[x] * block[1][x + y*src_stride]
2449 +obmc4[x] * block[0][x + y*src_stride];
2451 v <<= 8 - LOG2_OBMC_MAX;
2453 v >>= 8 - FRAC_BITS;
2456 v += dst[x + src_x];
2457 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2458 if(v&(~255)) v= ~(v>>31);
2459 dst8[x + y*src_stride] = v;
2461 dst[x + src_x] -= v;
2467 //FIXME name clenup (b_w, block_w, b_width stuff)
2468 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2469 const int b_width = s->b_width << s->block_max_depth;
2470 const int b_height= s->b_height << s->block_max_depth;
2471 const int b_stride= b_width;
2472 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2473 BlockNode *rt= lt+1;
2474 BlockNode *lb= lt+b_stride;
2475 BlockNode *rb= lb+1;
2477 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2478 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2485 }else if(b_x + 1 >= b_width){
2492 }else if(b_y + 1 >= b_height){
2497 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2500 if(!sliced && !offset_dst)
2503 }else if(src_x + b_w > w){
2507 obmc -= src_y*obmc_stride;
2509 if(!sliced && !offset_dst)
2510 dst -= src_y*dst_stride;
2512 }else if(src_y + b_h> h){
2516 if(b_w<=0 || b_h<=0) return;
2518 assert(src_stride > 2*MB_SIZE + 5);
2519 if(!sliced && offset_dst)
2520 dst += src_x + src_y*dst_stride;
2521 dst8+= src_x + src_y*src_stride;
2522 // src += src_x + src_y*src_stride;
2524 ptmp= tmp + 3*tmp_step;
2527 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2529 if(same_block(lt, rt)){
2534 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2537 if(same_block(lt, lb)){
2539 }else if(same_block(rt, lb)){
2544 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2547 if(same_block(lt, rb) ){
2549 }else if(same_block(rt, rb)){
2551 }else if(same_block(lb, rb)){
2555 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2558 for(y=0; y<b_h; y++){
2559 for(x=0; x<b_w; x++){
2560 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2561 if(add) dst[x + y*dst_stride] += v;
2562 else dst[x + y*dst_stride] -= v;
2565 for(y=0; y<b_h; y++){
2566 uint8_t *obmc2= obmc + (obmc_stride>>1);
2567 for(x=0; x<b_w; x++){
2568 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2569 if(add) dst[x + y*dst_stride] += v;
2570 else dst[x + y*dst_stride] -= v;
2573 for(y=0; y<b_h; y++){
2574 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2575 for(x=0; x<b_w; x++){
2576 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2577 if(add) dst[x + y*dst_stride] += v;
2578 else dst[x + y*dst_stride] -= v;
2581 for(y=0; y<b_h; y++){
2582 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2583 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2584 for(x=0; x<b_w; x++){
2585 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2586 if(add) dst[x + y*dst_stride] += v;
2587 else dst[x + y*dst_stride] -= v;
2594 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2595 STOP_TIMER("inner_add_yblock")
2597 for(y=0; y<b_h; y++){
2598 //FIXME ugly misuse of obmc_stride
2599 const uint8_t *obmc1= obmc + y*obmc_stride;
2600 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2601 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2602 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2603 for(x=0; x<b_w; x++){
2604 int v= obmc1[x] * block[3][x + y*src_stride]
2605 +obmc2[x] * block[2][x + y*src_stride]
2606 +obmc3[x] * block[1][x + y*src_stride]
2607 +obmc4[x] * block[0][x + y*src_stride];
2609 v <<= 8 - LOG2_OBMC_MAX;
2611 v >>= 8 - FRAC_BITS;
2614 v += dst[x + y*dst_stride];
2615 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2616 if(v&(~255)) v= ~(v>>31);
2617 dst8[x + y*src_stride] = v;
2619 dst[x + y*dst_stride] -= v;
2626 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2627 Plane *p= &s->plane[plane_index];
2628 const int mb_w= s->b_width << s->block_max_depth;
2629 const int mb_h= s->b_height << s->block_max_depth;
2631 int block_size = MB_SIZE >> s->block_max_depth;
2632 int block_w = plane_index ? block_size/2 : block_size;
2633 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2634 int obmc_stride= plane_index ? block_size : 2*block_size;
2635 int ref_stride= s->current_picture.linesize[plane_index];
2636 uint8_t *dst8= s->current_picture.data[plane_index];
2641 if(s->keyframe || (s->avctx->debug&512)){
2646 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2648 // DWTELEM * line = slice_buffer_get_line(sb, y);
2649 IDWTELEM * line = sb->line[y];
2652 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2653 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2655 if(v&(~255)) v= ~(v>>31);
2656 dst8[x + y*ref_stride]= v;
2660 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2662 // DWTELEM * line = slice_buffer_get_line(sb, y);
2663 IDWTELEM * line = sb->line[y];
2666 line[x] -= 128 << FRAC_BITS;
2667 // buf[x + y*w]-= 128<<FRAC_BITS;
2675 for(mb_x=0; mb_x<=mb_w; mb_x++){
2678 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2679 block_w*mb_x - block_w/2,
2680 block_w*mb_y - block_w/2,
2683 w, ref_stride, obmc_stride,
2685 add, 0, plane_index);
2687 STOP_TIMER("add_yblock")
2690 STOP_TIMER("predict_slice")
2693 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2694 Plane *p= &s->plane[plane_index];
2695 const int mb_w= s->b_width << s->block_max_depth;
2696 const int mb_h= s->b_height << s->block_max_depth;
2698 int block_size = MB_SIZE >> s->block_max_depth;
2699 int block_w = plane_index ? block_size/2 : block_size;
2700 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2701 const int obmc_stride= plane_index ? block_size : 2*block_size;
2702 int ref_stride= s->current_picture.linesize[plane_index];
2703 uint8_t *dst8= s->current_picture.data[plane_index];
2708 if(s->keyframe || (s->avctx->debug&512)){
2713 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2715 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2717 if(v&(~255)) v= ~(v>>31);
2718 dst8[x + y*ref_stride]= v;
2722 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2724 buf[x + y*w]-= 128<<FRAC_BITS;
2732 for(mb_x=0; mb_x<=mb_w; mb_x++){
2735 add_yblock(s, 0, NULL, buf, dst8, obmc,
2736 block_w*mb_x - block_w/2,
2737 block_w*mb_y - block_w/2,
2740 w, ref_stride, obmc_stride,
2742 add, 1, plane_index);
2744 STOP_TIMER("add_yblock")
2747 STOP_TIMER("predict_slice")
2750 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2751 const int mb_h= s->b_height << s->block_max_depth;
2753 for(mb_y=0; mb_y<=mb_h; mb_y++)
2754 predict_slice(s, buf, plane_index, add, mb_y);
2757 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2759 Plane *p= &s->plane[plane_index];
2760 const int block_size = MB_SIZE >> s->block_max_depth;
2761 const int block_w = plane_index ? block_size/2 : block_size;
2762 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2763 const int obmc_stride= plane_index ? block_size : 2*block_size;
2764 const int ref_stride= s->current_picture.linesize[plane_index];
2765 uint8_t *src= s-> input_picture.data[plane_index];
2766 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2767 const int b_stride = s->b_width << s->block_max_depth;
2768 const int w= p->width;
2769 const int h= p->height;
2770 int index= mb_x + mb_y*b_stride;
2771 BlockNode *b= &s->block[index];
2772 BlockNode backup= *b;
2776 b->type|= BLOCK_INTRA;
2777 b->color[plane_index]= 0;
2778 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2781 int mb_x2= mb_x + (i &1) - 1;
2782 int mb_y2= mb_y + (i>>1) - 1;
2783 int x= block_w*mb_x2 + block_w/2;
2784 int y= block_w*mb_y2 + block_w/2;
2786 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2787 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2789 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2790 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2791 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2792 int obmc_v= obmc[index];
2794 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2795 if(x<0) obmc_v += obmc[index + block_w];
2796 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2797 if(x+block_w>w) obmc_v += obmc[index - block_w];
2798 //FIXME precalc this or simplify it somehow else
2800 d = -dst[index] + (1<<(FRAC_BITS-1));
2802 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2803 aa += obmc_v * obmc_v; //FIXME precalclate this
2809 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2812 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2813 const int b_stride = s->b_width << s->block_max_depth;
2814 const int b_height = s->b_height<< s->block_max_depth;
2815 int index= x + y*b_stride;
2816 const BlockNode *b = &s->block[index];
2817 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2818 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2819 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2820 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2822 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2823 // int my_context= av_log2(2*FFABS(left->my - top->my));
2825 if(x<0 || x>=b_stride || y>=b_height)
2832 00001XXXX 15-30 8-15
2834 //FIXME try accurate rate
2835 //FIXME intra and inter predictors if surrounding blocks arent the same type
2836 if(b->type & BLOCK_INTRA){
2837 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2838 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2839 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2841 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2844 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2845 + av_log2(2*FFABS(dmy))
2846 + av_log2(2*b->ref));
2850 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2851 Plane *p= &s->plane[plane_index];
2852 const int block_size = MB_SIZE >> s->block_max_depth;
2853 const int block_w = plane_index ? block_size/2 : block_size;
2854 const int obmc_stride= plane_index ? block_size : 2*block_size;
2855 const int ref_stride= s->current_picture.linesize[plane_index];
2856 uint8_t *dst= s->current_picture.data[plane_index];
2857 uint8_t *src= s-> input_picture.data[plane_index];
2858 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2859 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2860 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2861 const int b_stride = s->b_width << s->block_max_depth;
2862 const int b_height = s->b_height<< s->block_max_depth;
2863 const int w= p->width;
2864 const int h= p->height;
2867 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2868 int sx= block_w*mb_x - block_w/2;
2869 int sy= block_w*mb_y - block_w/2;
2870 int x0= FFMAX(0,-sx);
2871 int y0= FFMAX(0,-sy);
2872 int x1= FFMIN(block_w*2, w-sx);
2873 int y1= FFMIN(block_w*2, h-sy);
2876 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2878 for(y=y0; y<y1; y++){
2879 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2880 const IDWTELEM *pred1 = pred + y*obmc_stride;
2881 uint8_t *cur1 = cur + y*ref_stride;
2882 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2883 for(x=x0; x<x1; x++){
2884 #if FRAC_BITS >= LOG2_OBMC_MAX
2885 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2887 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2889 v = (v + pred1[x]) >> FRAC_BITS;
2890 if(v&(~255)) v= ~(v>>31);
2895 /* copy the regions where obmc[] = (uint8_t)256 */
2896 if(LOG2_OBMC_MAX == 8
2897 && (mb_x == 0 || mb_x == b_stride-1)
2898 && (mb_y == 0 || mb_y == b_height-1)){
2907 for(y=y0; y<y1; y++)
2908 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2912 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2913 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2914 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2915 * so improving the score of one block is not strictly guaranteed to
2916 * improve the score of the whole frame, so iterative motion est
2917 * doesn't always converge. */
2918 if(s->avctx->me_cmp == FF_CMP_W97)
2919 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2920 else if(s->avctx->me_cmp == FF_CMP_W53)
2921 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2925 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2926 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2931 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2940 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2942 if(mb_x == b_stride-2)
2943 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2945 return distortion + rate*penalty_factor;
2948 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2950 Plane *p= &s->plane[plane_index];
2951 const int block_size = MB_SIZE >> s->block_max_depth;
2952 const int block_w = plane_index ? block_size/2 : block_size;
2953 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2954 const int obmc_stride= plane_index ? block_size : 2*block_size;
2955 const int ref_stride= s->current_picture.linesize[plane_index];
2956 uint8_t *dst= s->current_picture.data[plane_index];
2957 uint8_t *src= s-> input_picture.data[plane_index];
2958 static const IDWTELEM zero_dst[4096]; //FIXME
2959 const int b_stride = s->b_width << s->block_max_depth;
2960 const int w= p->width;
2961 const int h= p->height;
2964 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2967 int mb_x2= mb_x + (i%3) - 1;
2968 int mb_y2= mb_y + (i/3) - 1;
2969 int x= block_w*mb_x2 + block_w/2;
2970 int y= block_w*mb_y2 + block_w/2;
2972 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2973 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2975 //FIXME find a cleaner/simpler way to skip the outside stuff
2976 for(y2= y; y2<0; y2++)
2977 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2978 for(y2= h; y2<y+block_w; y2++)
2979 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2981 for(y2= y; y2<y+block_w; y2++)
2982 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2985 for(y2= y; y2<y+block_w; y2++)
2986 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2989 assert(block_w== 8 || block_w==16);
2990 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2994 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2995 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3003 rate = get_block_bits(s, mb_x, mb_y, 2);
3004 for(i=merged?4:0; i<9; i++){
3005 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3006 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3009 return distortion + rate*penalty_factor;
3012 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3013 const int b_stride= s->b_width << s->block_max_depth;
3014 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3015 BlockNode backup= *block;
3016 int rd, index, value;
3018 assert(mb_x>=0 && mb_y>=0);
3019 assert(mb_x<b_stride);
3022 block->color[0] = p[0];
3023 block->color[1] = p[1];
3024 block->color[2] = p[2];
3025 block->type |= BLOCK_INTRA;
3027 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3028 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3029 if(s->me_cache[index] == value)
3031 s->me_cache[index]= value;
3035 block->type &= ~BLOCK_INTRA;
3038 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3050 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3051 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3052 int p[2] = {p0, p1};
3053 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3056 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3057 const int b_stride= s->b_width << s->block_max_depth;
3058 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3059 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3060 int rd, index, value;
3062 assert(mb_x>=0 && mb_y>=0);
3063 assert(mb_x<b_stride);
3064 assert(((mb_x|mb_y)&1) == 0);
3066 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3067 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3068 if(s->me_cache[index] == value)
3070 s->me_cache[index]= value;
3075 block->type &= ~BLOCK_INTRA;
3076 block[1]= block[b_stride]= block[b_stride+1]= *block;
3078 rd= get_4block_rd(s, mb_x, mb_y, 0);
3085 block[0]= backup[0];
3086 block[1]= backup[1];
3087 block[b_stride]= backup[2];
3088 block[b_stride+1]= backup[3];
3093 static void iterative_me(SnowContext *s){
3094 int pass, mb_x, mb_y;
3095 const int b_width = s->b_width << s->block_max_depth;
3096 const int b_height= s->b_height << s->block_max_depth;
3097 const int b_stride= b_width;
3101 RangeCoder r = s->c;
3102 uint8_t state[sizeof(s->block_state)];
3103 memcpy(state, s->block_state, sizeof(s->block_state));
3104 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3105 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3106 encode_q_branch(s, 0, mb_x, mb_y);
3108 memcpy(s->block_state, state, sizeof(s->block_state));
3111 for(pass=0; pass<25; pass++){
3114 for(mb_y= 0; mb_y<b_height; mb_y++){
3115 for(mb_x= 0; mb_x<b_width; mb_x++){
3116 int dia_change, i, j, ref;
3117 int best_rd= INT_MAX, ref_rd;
3118 BlockNode backup, ref_b;
3119 const int index= mb_x + mb_y * b_stride;
3120 BlockNode *block= &s->block[index];
3121 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3122 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3123 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3124 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3125 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3126 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3127 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3128 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3129 const int b_w= (MB_SIZE >> s->block_max_depth);
3130 uint8_t obmc_edged[b_w*2][b_w*2];
3132 if(pass && (block->type & BLOCK_OPT))
3134 block->type |= BLOCK_OPT;
3138 if(!s->me_cache_generation)
3139 memset(s->me_cache, 0, sizeof(s->me_cache));
3140 s->me_cache_generation += 1<<22;
3145 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3147 for(y=0; y<b_w*2; y++)
3148 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3149 if(mb_x==b_stride-1)
3150 for(y=0; y<b_w*2; y++)
3151 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3153 for(x=0; x<b_w*2; x++)
3154 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3155 for(y=1; y<b_w; y++)
3156 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3158 if(mb_y==b_height-1){
3159 for(x=0; x<b_w*2; x++)
3160 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3161 for(y=b_w; y<b_w*2-1; y++)
3162 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3166 //skip stuff outside the picture
3167 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3169 uint8_t *src= s-> input_picture.data[0];
3170 uint8_t *dst= s->current_picture.data[0];
3171 const int stride= s->current_picture.linesize[0];
3172 const int block_w= MB_SIZE >> s->block_max_depth;
3173 const int sx= block_w*mb_x - block_w/2;
3174 const int sy= block_w*mb_y - block_w/2;
3175 const int w= s->plane[0].width;
3176 const int h= s->plane[0].height;
3180 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3181 for(y=h; y<sy+block_w*2; y++)
3182 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3184 for(y=sy; y<sy+block_w*2; y++)
3185 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3187 if(sx+block_w*2 > w){
3188 for(y=sy; y<sy+block_w*2; y++)
3189 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3193 // intra(black) = neighbors' contribution to the current block
3195 color[i]= get_dc(s, mb_x, mb_y, i);
3197 // get previous score (cannot be cached due to OBMC)
3198 if(pass > 0 && (block->type&BLOCK_INTRA)){
3199 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3200 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3202 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3206 for(ref=0; ref < s->ref_frames; ref++){
3207 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3208 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3213 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3214 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3216 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3218 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3220 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3222 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3225 //FIXME avoid subpel interpol / round to nearest integer
3228 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3230 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3231 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3232 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3233 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3239 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3242 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3244 //FIXME or try the standard 2 pass qpel or similar
3246 mvr[0][0]= block->mx;
3247 mvr[0][1]= block->my;
3248 if(ref_rd > best_rd){
3256 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3257 //FIXME RD style color selection
3259 if(!same_block(block, &backup)){
3260 if(tb ) tb ->type &= ~BLOCK_OPT;
3261 if(lb ) lb ->type &= ~BLOCK_OPT;
3262 if(rb ) rb ->type &= ~BLOCK_OPT;
3263 if(bb ) bb ->type &= ~BLOCK_OPT;
3264 if(tlb) tlb->type &= ~BLOCK_OPT;
3265 if(trb) trb->type &= ~BLOCK_OPT;
3266 if(blb) blb->type &= ~BLOCK_OPT;
3267 if(brb) brb->type &= ~BLOCK_OPT;
3272 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3277 if(s->block_max_depth == 1){
3279 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3280 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3282 int best_rd, init_rd;
3283 const int index= mb_x + mb_y * b_stride;
3286 b[0]= &s->block[index];
3288 b[2]= b[0]+b_stride;
3290 if(same_block(b[0], b[1]) &&
3291 same_block(b[0], b[2]) &&
3292 same_block(b[0], b[3]))
3295 if(!s->me_cache_generation)
3296 memset(s->me_cache, 0, sizeof(s->me_cache));
3297 s->me_cache_generation += 1<<22;
3299 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3301 //FIXME more multiref search?
3302 check_4block_inter(s, mb_x, mb_y,
3303 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3304 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3307 if(!(b[i]->type&BLOCK_INTRA))
3308 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3310 if(init_rd != best_rd)
3314 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3318 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3319 const int level= b->level;
3320 const int w= b->width;
3321 const int h= b->height;
3322 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3323 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3324 int x,y, thres1, thres2;
3327 if(s->qlog == LOSSLESS_QLOG){
3330 dst[x + y*stride]= src[x + y*stride];
3334 bias= bias ? 0 : (3*qmul)>>3;
3335 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3341 int i= src[x + y*stride];
3343 if((unsigned)(i+thres1) > thres2){
3346 i/= qmul; //FIXME optimize
3347 dst[x + y*stride]= i;
3351 i/= qmul; //FIXME optimize
3352 dst[x + y*stride]= -i;
3355 dst[x + y*stride]= 0;
3361 int i= src[x + y*stride];
3363 if((unsigned)(i+thres1) > thres2){
3366 i= (i + bias) / qmul; //FIXME optimize
3367 dst[x + y*stride]= i;
3371 i= (i + bias) / qmul; //FIXME optimize
3372 dst[x + y*stride]= -i;
3375 dst[x + y*stride]= 0;
3379 if(level+1 == s->spatial_decomposition_count){
3380 // STOP_TIMER("quantize")
3384 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3385 const int w= b->width;
3386 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3387 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3388 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3392 if(s->qlog == LOSSLESS_QLOG) return;
3394 for(y=start_y; y<end_y; y++){
3395 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3396 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3400 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3402 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3406 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3407 STOP_TIMER("dquant")
3411 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3412 const int w= b->width;
3413 const int h= b->height;
3414 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3415 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3416 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3420 if(s->qlog == LOSSLESS_QLOG) return;
3424 int i= src[x + y*stride];
3426 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3428 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3432 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3433 STOP_TIMER("dquant")
3437 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3438 const int w= b->width;
3439 const int h= b->height;
3442 for(y=h-1; y>=0; y--){
3443 for(x=w-1; x>=0; x--){
3444 int i= x + y*stride;
3448 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3449 else src[i] -= src[i - 1];
3451 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3452 else src[i] -= src[i - 1];
3455 if(y) src[i] -= src[i - stride];
3461 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3462 const int w= b->width;
3467 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3471 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3473 for(y=start_y; y<end_y; y++){
3475 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3476 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3480 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3481 else line[x] += line[x - 1];
3483 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3484 else line[x] += line[x - 1];
3487 if(y) line[x] += prev[x];
3492 // STOP_TIMER("correlate")
3495 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3496 const int w= b->width;
3497 const int h= b->height;
3502 int i= x + y*stride;
3506 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3507 else src[i] += src[i - 1];
3509 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3510 else src[i] += src[i - 1];
3513 if(y) src[i] += src[i - stride];
3519 static void encode_header(SnowContext *s){
3520 int plane_index, level, orientation, i;
3523 memset(kstate, MID_STATE, sizeof(kstate));
3525 put_rac(&s->c, kstate, s->keyframe);
3526 if(s->keyframe || s->always_reset){
3528 s->last_spatial_decomposition_type=
3532 s->last_block_max_depth= 0;
3533 for(plane_index=0; plane_index<2; plane_index++){
3534 Plane *p= &s->plane[plane_index];
3537 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3541 put_symbol(&s->c, s->header_state, s->version, 0);
3542 put_rac(&s->c, s->header_state, s->always_reset);
3543 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3544 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3545 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3546 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3547 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3548 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3549 put_rac(&s->c, s->header_state, s->spatial_scalability);
3550 // put_rac(&s->c, s->header_state, s->rate_scalability);
3551 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3553 for(plane_index=0; plane_index<2; plane_index++){
3554 for(level=0; level<s->spatial_decomposition_count; level++){
3555 for(orientation=level ? 1:0; orientation<4; orientation++){
3556 if(orientation==2) continue;
3557 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3565 for(plane_index=0; plane_index<2; plane_index++){
3566 Plane *p= &s->plane[plane_index];
3567 update_mc |= p->last_htaps != p->htaps;
3568 update_mc |= p->last_diag_mc != p->diag_mc;
3569 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3571 if(!s->always_reset)
3572 put_rac(&s->c, s->header_state, update_mc);
3574 for(plane_index=0; plane_index<2; plane_index++){
3575 Plane *p= &s->plane[plane_index];
3576 put_rac(&s->c, s->header_state, p->diag_mc);
3577 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3578 for(i= p->htaps/2; i; i--)
3579 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3581 p->last_diag_mc= p->diag_mc;
3582 p->last_htaps= p->htaps;
3583 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3588 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3589 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3590 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3591 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3592 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3594 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3595 s->last_qlog = s->qlog;
3596 s->last_qbias = s->qbias;
3597 s->last_mv_scale = s->mv_scale;
3598 s->last_block_max_depth = s->block_max_depth;
3601 static int decode_header(SnowContext *s){
3602 int plane_index, level, orientation;
3605 memset(kstate, MID_STATE, sizeof(kstate));
3607 s->keyframe= get_rac(&s->c, kstate);
3608 if(s->keyframe || s->always_reset){
3610 s->spatial_decomposition_type=
3614 s->block_max_depth= 0;
3617 s->version= get_symbol(&s->c, s->header_state, 0);
3619 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3622 s->always_reset= get_rac(&s->c, s->header_state);
3623 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3624 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3625 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3626 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3627 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3628 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3629 s->spatial_scalability= get_rac(&s->c, s->header_state);
3630 // s->rate_scalability= get_rac(&s->c, s->header_state);
3631 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3633 for(plane_index=0; plane_index<3; plane_index++){
3634 for(level=0; level<s->spatial_decomposition_count; level++){
3635 for(orientation=level ? 1:0; orientation<4; orientation++){
3637 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3638 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3639 else q= get_symbol(&s->c, s->header_state, 1);
3640 s->plane[plane_index].band[level][orientation].qlog= q;
3647 if(s->always_reset || get_rac(&s->c, s->header_state)){
3648 for(plane_index=0; plane_index<2; plane_index++){
3649 int htaps, i, sum=0, absum=0;
3650 Plane *p= &s->plane[plane_index];
3651 p->diag_mc= get_rac(&s->c, s->header_state);
3652 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3653 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3656 for(i= htaps/2; i; i--){
3657 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3658 sum += p->hcoeff[i];
3660 p->hcoeff[0]= 32-sum;
3662 s->plane[2].diag_mc= s->plane[1].diag_mc;
3663 s->plane[2].htaps = s->plane[1].htaps;
3664 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3668 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3669 if(s->spatial_decomposition_type > 1){
3670 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3674 s->qlog += get_symbol(&s->c, s->header_state, 1);
3675 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3676 s->qbias += get_symbol(&s->c, s->header_state, 1);
3677 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3678 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3679 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3680 s->block_max_depth= 0;
3687 static void init_qexp(void){
3691 for(i=0; i<QROOT; i++){
3693 v *= pow(2, 1.0 / QROOT);
3697 static int common_init(AVCodecContext *avctx){
3698 SnowContext *s = avctx->priv_data;
3704 dsputil_init(&s->dsp, avctx);
3707 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3708 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3709 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3710 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3711 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3712 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3731 #define mcfh(dx,dy)\
3732 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3733 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3734 mc_block_hpel ## dx ## dy ## 16;\
3735 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3736 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3737 mc_block_hpel ## dx ## dy ## 8;
3747 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3749 width= s->avctx->width;
3750 height= s->avctx->height;
3752 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3753 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3755 for(i=0; i<MAX_REF_FRAMES; i++)
3756 for(j=0; j<MAX_REF_FRAMES; j++)
3757 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3759 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3764 static int common_init_after_header(AVCodecContext *avctx){
3765 SnowContext *s = avctx->priv_data;
3766 int plane_index, level, orientation;
3768 for(plane_index=0; plane_index<3; plane_index++){
3769 int w= s->avctx->width;
3770 int h= s->avctx->height;
3773 w>>= s->chroma_h_shift;
3774 h>>= s->chroma_v_shift;
3776 s->plane[plane_index].width = w;
3777 s->plane[plane_index].height= h;
3779 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3780 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3781 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3782 SubBand *b= &s->plane[plane_index].band[level][orientation];
3784 b->buf= s->spatial_dwt_buffer;
3786 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3787 b->width = (w + !(orientation&1))>>1;
3788 b->height= (h + !(orientation>1))>>1;
3790 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3791 b->buf_x_offset = 0;
3792 b->buf_y_offset = 0;
3796 b->buf_x_offset = (w+1)>>1;
3799 b->buf += b->stride>>1;
3800 b->buf_y_offset = b->stride_line >> 1;
3802 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3805 b->parent= &s->plane[plane_index].band[level-1][orientation];
3806 //FIXME avoid this realloc
3807 av_freep(&b->x_coeff);
3808 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3818 static int qscale2qlog(int qscale){
3819 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3820 + 61*QROOT/8; //<64 >60
3823 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3825 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3826 * FIXME we know exact mv bits at this point,
3827 * but ratecontrol isn't set up to include them. */
3828 uint32_t coef_sum= 0;
3829 int level, orientation, delta_qlog;
3831 for(level=0; level<s->spatial_decomposition_count; level++){
3832 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3833 SubBand *b= &s->plane[0].band[level][orientation];
3834 IDWTELEM *buf= b->ibuf;
3835 const int w= b->width;
3836 const int h= b->height;
3837 const int stride= b->stride;
3838 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3839 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3840 const int qdiv= (1<<16)/qmul;
3842 //FIXME this is ugly
3845 buf[x+y*stride]= b->buf[x+y*stride];
3847 decorrelate(s, b, buf, stride, 1, 0);
3850 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3854 /* ugly, ratecontrol just takes a sqrt again */
3855 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3856 assert(coef_sum < INT_MAX);
3858 if(pict->pict_type == I_TYPE){
3859 s->m.current_picture.mb_var_sum= coef_sum;
3860 s->m.current_picture.mc_mb_var_sum= 0;
3862 s->m.current_picture.mc_mb_var_sum= coef_sum;
3863 s->m.current_picture.mb_var_sum= 0;
3866 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3867 if (pict->quality < 0)
3869 s->lambda= pict->quality * 3/2;
3870 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3871 s->qlog+= delta_qlog;
3875 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3876 int width = p->width;
3877 int height= p->height;
3878 int level, orientation, x, y;
3880 for(level=0; level<s->spatial_decomposition_count; level++){
3881 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3882 SubBand *b= &p->band[level][orientation];
3883 IDWTELEM *ibuf= b->ibuf;
3886 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3887 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3888 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3889 for(y=0; y<height; y++){
3890 for(x=0; x<width; x++){
3891 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3896 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3897 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3902 static int encode_init(AVCodecContext *avctx)
3904 SnowContext *s = avctx->priv_data;
3907 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3908 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3909 "use vstrict=-2 / -strict -2 to use it anyway\n");
3913 if(avctx->prediction_method == DWT_97
3914 && (avctx->flags & CODEC_FLAG_QSCALE)
3915 && avctx->global_quality == 0){
3916 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3920 s->spatial_decomposition_count= 5;
3921 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3923 s->chroma_h_shift= 1; //FIXME XXX
3924 s->chroma_v_shift= 1;
3926 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3927 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3929 for(plane_index=0; plane_index<3; plane_index++){
3930 s->plane[plane_index].diag_mc= 1;
3931 s->plane[plane_index].htaps= 6;
3932 s->plane[plane_index].hcoeff[0]= 40;
3933 s->plane[plane_index].hcoeff[1]= -10;
3934 s->plane[plane_index].hcoeff[2]= 2;
3935 s->plane[plane_index].fast_mc= 1;
3939 common_init_after_header(avctx);
3945 s->m.flags = avctx->flags;
3946 s->m.bit_rate= avctx->bit_rate;
3948 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3949 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3950 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3951 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3952 h263_encode_init(&s->m); //mv_penalty
3954 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3956 if(avctx->flags&CODEC_FLAG_PASS1){
3957 if(!avctx->stats_out)
3958 avctx->stats_out = av_mallocz(256);
3960 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3961 if(ff_rate_control_init(&s->m) < 0)
3964 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3966 for(plane_index=0; plane_index<3; plane_index++){
3967 calculate_vissual_weight(s, &s->plane[plane_index]);
3971 avctx->coded_frame= &s->current_picture;
3972 switch(avctx->pix_fmt){
3973 // case PIX_FMT_YUV444P:
3974 // case PIX_FMT_YUV422P:
3975 case PIX_FMT_YUV420P:
3977 // case PIX_FMT_YUV411P:
3978 // case PIX_FMT_YUV410P:
3979 s->colorspace_type= 0;
3981 /* case PIX_FMT_RGB32:
3985 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3988 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3989 s->chroma_h_shift= 1;
3990 s->chroma_v_shift= 1;
3992 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3993 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3995 s->avctx->get_buffer(s->avctx, &s->input_picture);
3997 if(s->avctx->me_method == ME_ITER){
3999 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4000 for(i=0; i<s->max_ref_frames; i++){
4001 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4002 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4009 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4012 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4016 int w= s->avctx->width >>is_chroma;
4017 int h= s->avctx->height >>is_chroma;
4018 int ls= frame->linesize[p];
4019 uint8_t *src= frame->data[p];
4021 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4022 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4023 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4030 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4037 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4045 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4053 static int frame_start(SnowContext *s){
4055 int w= s->avctx->width; //FIXME round up to x16 ?
4056 int h= s->avctx->height;
4058 if(s->current_picture.data[0]){
4059 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4060 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4061 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4064 tmp= s->last_picture[s->max_ref_frames-1];
4065 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4066 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4067 #ifdef USE_HALFPEL_PLANE
4068 if(s->current_picture.data[0])
4069 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4071 s->last_picture[0]= s->current_picture;
4072 s->current_picture= tmp;
4078 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4079 if(i && s->last_picture[i-1].key_frame)
4084 s->current_picture.reference= 1;
4085 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4086 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4090 s->current_picture.key_frame= s->keyframe;
4095 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4096 SnowContext *s = avctx->priv_data;
4097 RangeCoder * const c= &s->c;
4098 AVFrame *pict = data;
4099 const int width= s->avctx->width;
4100 const int height= s->avctx->height;
4101 int level, orientation, plane_index, i, y;
4102 uint8_t rc_header_bak[sizeof(s->header_state)];
4103 uint8_t rc_block_bak[sizeof(s->block_state)];
4105 ff_init_range_encoder(c, buf, buf_size);
4106 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4110 for(y=0; y<(height>>shift); y++)
4111 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4112 &pict->data[i][y * pict->linesize[i]],
4115 s->new_picture = *pict;
4117 s->m.picture_number= avctx->frame_number;
4118 if(avctx->flags&CODEC_FLAG_PASS2){
4120 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4121 s->keyframe= pict->pict_type==FF_I_TYPE;
4122 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4123 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4124 if (pict->quality < 0)
4128 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4130 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4133 if(s->pass1_rc && avctx->frame_number == 0)
4134 pict->quality= 2*FF_QP2LAMBDA;
4136 s->qlog= qscale2qlog(pict->quality);
4137 s->lambda = pict->quality * 3/2;
4139 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4140 s->qlog= LOSSLESS_QLOG;
4142 }//else keep previous frame's qlog until after motion est
4146 s->m.current_picture_ptr= &s->m.current_picture;
4147 if(pict->pict_type == P_TYPE){
4148 int block_width = (width +15)>>4;
4149 int block_height= (height+15)>>4;
4150 int stride= s->current_picture.linesize[0];
4152 assert(s->current_picture.data[0]);
4153 assert(s->last_picture[0].data[0]);
4155 s->m.avctx= s->avctx;
4156 s->m.current_picture.data[0]= s->current_picture.data[0];
4157 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4158 s->m. new_picture.data[0]= s-> input_picture.data[0];
4159 s->m. last_picture_ptr= &s->m. last_picture;
4161 s->m. last_picture.linesize[0]=
4162 s->m. new_picture.linesize[0]=
4163 s->m.current_picture.linesize[0]= stride;
4164 s->m.uvlinesize= s->current_picture.linesize[1];
4166 s->m.height= height;
4167 s->m.mb_width = block_width;
4168 s->m.mb_height= block_height;
4169 s->m.mb_stride= s->m.mb_width+1;
4170 s->m.b8_stride= 2*s->m.mb_width+1;
4172 s->m.pict_type= pict->pict_type;
4173 s->m.me_method= s->avctx->me_method;
4174 s->m.me.scene_change_score=0;
4175 s->m.flags= s->avctx->flags;
4176 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4177 s->m.out_format= FMT_H263;
4178 s->m.unrestricted_mv= 1;
4180 s->m.lambda = s->lambda;
4181 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4182 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4184 s->m.dsp= s->dsp; //move
4190 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4191 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4196 s->m.pict_type = pict->pict_type;
4197 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4200 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4201 encode_blocks(s, 1);
4202 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4204 for(plane_index=0; plane_index<3; plane_index++){
4205 Plane *p= &s->plane[plane_index];
4209 // int bits= put_bits_count(&s->c.pb);
4211 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4213 if(pict->data[plane_index]) //FIXME gray hack
4216 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4219 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4222 && pict->pict_type == P_TYPE
4223 && !(avctx->flags&CODEC_FLAG_PASS2)
4224 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4225 ff_init_range_encoder(c, buf, buf_size);
4226 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4227 pict->pict_type= FF_I_TYPE;
4229 s->current_picture.key_frame=1;
4233 if(s->qlog == LOSSLESS_QLOG){
4236 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4242 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4247 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4249 if(s->pass1_rc && plane_index==0){
4250 int delta_qlog = ratecontrol_1pass(s, pict);
4251 if (delta_qlog <= INT_MIN)
4254 //reordering qlog in the bitstream would eliminate this reset
4255 ff_init_range_encoder(c, buf, buf_size);
4256 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4257 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4259 encode_blocks(s, 0);
4263 for(level=0; level<s->spatial_decomposition_count; level++){
4264 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4265 SubBand *b= &p->band[level][orientation];
4267 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4269 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
4270 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4271 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4273 correlate(s, b, b->ibuf, b->stride, 1, 0);
4276 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4278 for(level=0; level<s->spatial_decomposition_count; level++){
4279 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4280 SubBand *b= &p->band[level][orientation];
4282 dequantize(s, b, b->ibuf, b->stride);
4286 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4287 if(s->qlog == LOSSLESS_QLOG){
4290 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4295 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4296 STOP_TIMER("pred-conv")}
4299 if(pict->pict_type == I_TYPE){
4302 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4303 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4307 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4308 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4311 if(s->avctx->flags&CODEC_FLAG_PSNR){
4314 if(pict->data[plane_index]) //FIXME gray hack
4317 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4321 s->avctx->error[plane_index] += error;
4322 s->current_picture.error[plane_index] = error;
4326 if(s->last_picture[s->max_ref_frames-1].data[0]){
4327 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4329 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4330 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4333 s->current_picture.coded_picture_number = avctx->frame_number;
4334 s->current_picture.pict_type = pict->pict_type;
4335 s->current_picture.quality = pict->quality;
4336 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4337 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4338 s->m.current_picture.display_picture_number =
4339 s->m.current_picture.coded_picture_number = avctx->frame_number;
4340 s->m.current_picture.quality = pict->quality;
4341 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4343 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4345 if(avctx->flags&CODEC_FLAG_PASS1)
4346 ff_write_pass1_stats(&s->m);
4347 s->m.last_pict_type = s->m.pict_type;
4348 avctx->frame_bits = s->m.frame_bits;
4349 avctx->mv_bits = s->m.mv_bits;
4350 avctx->misc_bits = s->m.misc_bits;
4351 avctx->p_tex_bits = s->m.p_tex_bits;
4355 return ff_rac_terminate(c);
4358 static void common_end(SnowContext *s){
4359 int plane_index, level, orientation, i;
4361 av_freep(&s->spatial_dwt_buffer);
4362 av_freep(&s->spatial_idwt_buffer);
4364 av_freep(&s->m.me.scratchpad);
4365 av_freep(&s->m.me.map);
4366 av_freep(&s->m.me.score_map);
4367 av_freep(&s->m.obmc_scratchpad);
4369 av_freep(&s->block);
4371 for(i=0; i<MAX_REF_FRAMES; i++){
4372 av_freep(&s->ref_mvs[i]);
4373 av_freep(&s->ref_scores[i]);
4374 if(s->last_picture[i].data[0])
4375 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4378 for(plane_index=0; plane_index<3; plane_index++){
4379 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4380 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4381 SubBand *b= &s->plane[plane_index].band[level][orientation];
4383 av_freep(&b->x_coeff);
4389 static int encode_end(AVCodecContext *avctx)
4391 SnowContext *s = avctx->priv_data;
4394 av_free(avctx->stats_out);
4399 static int decode_init(AVCodecContext *avctx)
4401 SnowContext *s = avctx->priv_data;
4403 avctx->pix_fmt= PIX_FMT_YUV420P;
4410 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4411 SnowContext *s = avctx->priv_data;
4412 RangeCoder * const c= &s->c;
4414 AVFrame *picture = data;
4415 int level, orientation, plane_index, i;
4417 ff_init_range_decoder(c, buf, buf_size);
4418 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4420 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4422 common_init_after_header(avctx);
4424 // realloc slice buffer for the case that spatial_decomposition_count changed
4425 slice_buffer_destroy(&s->sb);
4426 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4428 for(plane_index=0; plane_index<3; plane_index++){
4429 Plane *p= &s->plane[plane_index];
4430 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4431 && p->hcoeff[1]==-10
4435 if(!s->block) alloc_blocks(s);
4438 //keyframe flag dupliaction mess FIXME
4439 if(avctx->debug&FF_DEBUG_PICT_INFO)
4440 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4444 for(plane_index=0; plane_index<3; plane_index++){
4445 Plane *p= &s->plane[plane_index];
4449 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4451 if(s->avctx->debug&2048){
4452 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4453 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4457 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4458 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4464 for(level=0; level<s->spatial_decomposition_count; level++){
4465 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4466 SubBand *b= &p->band[level][orientation];
4467 unpack_coeffs(s, b, b->parent, orientation);
4470 STOP_TIMER("unpack coeffs");
4474 const int mb_h= s->b_height << s->block_max_depth;
4475 const int block_size = MB_SIZE >> s->block_max_depth;
4476 const int block_w = plane_index ? block_size/2 : block_size;
4478 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4483 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4484 for(mb_y=0; mb_y<=mb_h; mb_y++){
4486 int slice_starty = block_w*mb_y;
4487 int slice_h = block_w*(mb_y+1);
4488 if (!(s->keyframe || s->avctx->debug&512)){
4489 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4490 slice_h -= (block_w >> 1);
4495 for(level=0; level<s->spatial_decomposition_count; level++){
4496 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4497 SubBand *b= &p->band[level][orientation];
4500 int our_mb_start = mb_y;
4501 int our_mb_end = (mb_y + 1);
4503 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4504 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4505 if (!(s->keyframe || s->avctx->debug&512)){
4506 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4507 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4509 start_y = FFMIN(b->height, start_y);
4510 end_y = FFMIN(b->height, end_y);
4512 if (start_y != end_y){
4513 if (orientation == 0){
4514 SubBand * correlate_band = &p->band[0][0];
4515 int correlate_end_y = FFMIN(b->height, end_y + 1);
4516 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4517 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4518 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4519 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4522 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4526 STOP_TIMER("decode_subband_slice");
4530 for(; yd<slice_h; yd+=4){
4531 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4533 STOP_TIMER("idwt slice");}
4536 if(s->qlog == LOSSLESS_QLOG){
4537 for(; yq<slice_h && yq<h; yq++){
4538 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4540 line[x] <<= FRAC_BITS;
4545 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4547 y = FFMIN(p->height, slice_starty);
4548 end_y = FFMIN(p->height, slice_h);
4550 slice_buffer_release(&s->sb, y++);
4553 slice_buffer_flush(&s->sb);
4555 STOP_TIMER("idwt + predict_slices")}
4560 if(s->last_picture[s->max_ref_frames-1].data[0]){
4561 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4563 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4564 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4567 if(!(s->avctx->debug&2048))
4568 *picture= s->current_picture;
4570 *picture= s->mconly_picture;
4572 *data_size = sizeof(AVFrame);
4574 bytes_read= c->bytestream - c->bytestream_start;
4575 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4580 static int decode_end(AVCodecContext *avctx)
4582 SnowContext *s = avctx->priv_data;
4584 slice_buffer_destroy(&s->sb);
4591 AVCodec snow_decoder = {
4595 sizeof(SnowContext),
4600 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4604 #ifdef CONFIG_SNOW_ENCODER
4605 AVCodec snow_encoder = {
4609 sizeof(SnowContext),
4626 int buffer[2][width*height];
4629 s.spatial_decomposition_count=6;
4630 s.spatial_decomposition_type=1;
4632 printf("testing 5/3 DWT\n");
4633 for(i=0; i<width*height; i++)
4634 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4636 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4637 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4639 for(i=0; i<width*height; i++)
4640 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4642 printf("testing 9/7 DWT\n");
4643 s.spatial_decomposition_type=0;
4644 for(i=0; i<width*height; i++)
4645 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4647 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4648 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4650 for(i=0; i<width*height; i++)
4651 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4654 printf("testing AC coder\n");
4655 memset(s.header_state, 0, sizeof(s.header_state));
4656 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4657 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4659 for(i=-256; i<256; i++){
4661 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4662 STOP_TIMER("put_symbol")
4664 ff_rac_terminate(&s.c);
4666 memset(s.header_state, 0, sizeof(s.header_state));
4667 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4668 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4670 for(i=-256; i<256; i++){
4673 j= get_symbol(&s.c, s.header_state, 1);
4674 STOP_TIMER("get_symbol")
4675 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4679 int level, orientation, x, y;
4680 int64_t errors[8][4];
4683 memset(errors, 0, sizeof(errors));
4684 s.spatial_decomposition_count=3;
4685 s.spatial_decomposition_type=0;
4686 for(level=0; level<s.spatial_decomposition_count; level++){
4687 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4688 int w= width >> (s.spatial_decomposition_count-level);
4689 int h= height >> (s.spatial_decomposition_count-level);
4690 int stride= width << (s.spatial_decomposition_count-level);
4691 DWTELEM *buf= buffer[0];
4694 if(orientation&1) buf+=w;
4695 if(orientation>1) buf+=stride>>1;
4697 memset(buffer[0], 0, sizeof(int)*width*height);
4698 buf[w/2 + h/2*stride]= 256*256;
4699 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4700 for(y=0; y<height; y++){
4701 for(x=0; x<width; x++){
4702 int64_t d= buffer[0][x + y*width];
4704 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4706 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4708 error= (int)(sqrt(error)+0.5);
4709 errors[level][orientation]= error;
4710 if(g) g=ff_gcd(g, error);
4714 printf("static int const visual_weight[][4]={\n");
4715 for(level=0; level<s.spatial_decomposition_count; level++){
4717 for(orientation=0; orientation<4; orientation++){
4718 printf("%8"PRId64",", errors[level][orientation]/g);
4726 int w= width >> (s.spatial_decomposition_count-level);
4727 int h= height >> (s.spatial_decomposition_count-level);
4728 int stride= width << (s.spatial_decomposition_count-level);
4729 DWTELEM *buf= buffer[0];
4735 memset(buffer[0], 0, sizeof(int)*width*height);
4737 for(y=0; y<height; y++){
4738 for(x=0; x<width; x++){
4739 int tab[4]={0,2,3,1};
4740 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4743 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4747 buf[x + y*stride ]=169;
4748 buf[x + y*stride-w]=64;
4751 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4753 for(y=0; y<height; y++){
4754 for(x=0; x<width; x++){
4755 int64_t d= buffer[0][x + y*width];
4757 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4759 if(FFABS(height/2-y)<9) printf("\n");