2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
28 #include "mpegvideo.h"
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
359 static const uint8_t obmc4[16]={
367 static const uint8_t * const obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373 typedef struct BlockNode{
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 #define ENCODER_EXTRA_BITS 4
400 typedef struct x_and_coeff{
405 typedef struct SubBand{
410 int qlog; ///< log(qscale)/log[2^(1/6)]
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
421 typedef struct Plane{
424 SubBand band[MAX_DECOMPOSITIONS][4];
427 int8_t hcoeff[HTAPS_MAX/2];
432 int8_t last_hcoeff[HTAPS_MAX/2];
436 typedef struct SnowContext{
437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
439 AVCodecContext *avctx;
443 AVFrame input_picture; ///< new_picture with the internal linesizes
444 AVFrame current_picture;
445 AVFrame last_picture[MAX_REF_FRAMES];
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447 AVFrame mconly_picture;
448 // uint8_t q_context[16];
449 uint8_t header_state[32];
450 uint8_t block_state[128 + 32*128];
454 int spatial_decomposition_type;
455 int last_spatial_decomposition_type;
456 int temporal_decomposition_type;
457 int spatial_decomposition_count;
458 int last_spatial_decomposition_count;
459 int temporal_decomposition_count;
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463 uint32_t *ref_scores[MAX_REF_FRAMES];
464 DWTELEM *spatial_dwt_buffer;
465 IDWTELEM *spatial_idwt_buffer;
469 int spatial_scalability;
479 #define QBIAS_SHIFT 3
483 int last_block_max_depth;
484 Plane plane[MAX_PLANES];
486 #define ME_CACHE_SIZE 1024
487 int me_cache[ME_CACHE_SIZE];
488 int me_cache_generation;
491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
507 static void iterative_me(SnowContext *s);
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
513 buf->base_buffer = base_buffer;
514 buf->line_count = line_count;
515 buf->line_width = line_width;
516 buf->data_count = max_allocated_lines;
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
520 for(i = 0; i < max_allocated_lines; i++){
521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
524 buf->data_stack_top = max_allocated_lines - 1;
527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
531 assert(buf->data_stack_top >= 0);
532 // assert(!buf->line[line]);
534 return buf->line[line];
536 buffer = buf->data_stack[buf->data_stack_top];
537 buf->data_stack_top--;
538 buf->line[line] = buffer;
543 static void slice_buffer_release(slice_buffer * buf, int line)
547 assert(line >= 0 && line < buf->line_count);
548 assert(buf->line[line]);
550 buffer = buf->line[line];
551 buf->data_stack_top++;
552 buf->data_stack[buf->data_stack_top] = buffer;
553 buf->line[line] = NULL;
556 static void slice_buffer_flush(slice_buffer * buf)
559 for(i = 0; i < buf->line_count; i++){
561 slice_buffer_release(buf, i);
565 static void slice_buffer_destroy(slice_buffer * buf)
568 slice_buffer_flush(buf);
570 for(i = buf->data_count - 1; i >= 0; i--){
571 av_freep(&buf->data_stack[i]);
573 av_freep(&buf->data_stack);
574 av_freep(&buf->line);
578 // Avoid a name clash on SGI IRIX
581 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
582 static uint8_t qexp[QROOT];
584 static inline int mirror(int v, int m){
585 while((unsigned)v > (unsigned)m){
592 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
596 const int a= FFABS(v);
597 const int e= av_log2(a);
599 const int el= FFMIN(e, 10);
600 put_rac(c, state+0, 0);
603 put_rac(c, state+1+i, 1); //1..10
606 put_rac(c, state+1+9, 1); //1..10
608 put_rac(c, state+1+FFMIN(i,9), 0);
610 for(i=e-1; i>=el; i--){
611 put_rac(c, state+22+9, (a>>i)&1); //22..31
614 put_rac(c, state+22+i, (a>>i)&1); //22..31
618 put_rac(c, state+11 + el, v < 0); //11..21
621 put_rac(c, state+0, 0);
624 put_rac(c, state+1+i, 1); //1..10
626 put_rac(c, state+1+i, 0);
628 for(i=e-1; i>=0; i--){
629 put_rac(c, state+22+i, (a>>i)&1); //22..31
633 put_rac(c, state+11 + e, v < 0); //11..21
636 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
638 put_rac(c, state+1+FFMIN(i,9), 0);
640 for(i=e-1; i>=0; i--){
641 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
645 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
649 put_rac(c, state+0, 1);
653 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
654 if(get_rac(c, state+0))
659 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
664 for(i=e-1; i>=0; i--){
665 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
668 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
675 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
677 int r= log2>=0 ? 1<<log2 : 1;
683 put_rac(c, state+4+log2, 1);
688 put_rac(c, state+4+log2, 0);
690 for(i=log2-1; i>=0; i--){
691 put_rac(c, state+31-i, (v>>i)&1);
695 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
697 int r= log2>=0 ? 1<<log2 : 1;
702 while(get_rac(c, state+4+log2)){
708 for(i=log2-1; i>=0; i--){
709 v+= get_rac(c, state+31-i)<<i;
715 static av_always_inline void
716 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
717 int dst_step, int src_step, int ref_step,
718 int width, int mul, int add, int shift,
719 int highpass, int inverse){
720 const int mirror_left= !highpass;
721 const int mirror_right= (width&1) ^ highpass;
722 const int w= (width>>1) - 1 + (highpass & width);
725 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
727 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
734 LIFT(src[i*src_step],
735 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
741 LIFT(src[w*src_step],
742 ((mul*2*ref[w*ref_step]+add)>>shift),
747 static av_always_inline void
748 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
749 int dst_step, int src_step, int ref_step,
750 int width, int mul, int add, int shift,
751 int highpass, int inverse){
752 const int mirror_left= !highpass;
753 const int mirror_right= (width&1) ^ highpass;
754 const int w= (width>>1) - 1 + (highpass & width);
757 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
759 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
766 LIFT(src[i*src_step],
767 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
773 LIFT(src[w*src_step],
774 ((mul*2*ref[w*ref_step]+add)>>shift),
780 static av_always_inline void
781 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
782 int dst_step, int src_step, int ref_step,
783 int width, int mul, int add, int shift,
784 int highpass, int inverse){
785 const int mirror_left= !highpass;
786 const int mirror_right= (width&1) ^ highpass;
787 const int w= (width>>1) - 1 + (highpass & width);
791 #define LIFTS(src, ref, inv) \
793 (src) + (((ref) + 4*(src))>>shift): \
794 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
796 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
803 LIFTS(src[i*src_step],
804 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
810 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
813 static av_always_inline void
814 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
815 int dst_step, int src_step, int ref_step,
816 int width, int mul, int add, int shift,
817 int highpass, int inverse){
818 const int mirror_left= !highpass;
819 const int mirror_right= (width&1) ^ highpass;
820 const int w= (width>>1) - 1 + (highpass & width);
824 #define LIFTS(src, ref, inv) \
826 (src) + (((ref) + 4*(src))>>shift): \
827 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
829 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
836 LIFTS(src[i*src_step],
837 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
843 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
848 static void horizontal_decompose53i(DWTELEM *b, int width){
850 const int width2= width>>1;
852 const int w2= (width+1)>>1;
854 for(x=0; x<width2; x++){
856 temp[x+w2]= b[2*x + 1];
870 for(x=1; x+1<width2; x+=2){
874 A2 += (A1 + A3 + 2)>>2;
878 A1= temp[x+1+width2];
881 A4 += (A1 + A3 + 2)>>2;
887 A2 += (A1 + A3 + 2)>>2;
892 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
893 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
897 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
900 for(i=0; i<width; i++){
901 b1[i] -= (b0[i] + b2[i])>>1;
905 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
908 for(i=0; i<width; i++){
909 b1[i] += (b0[i] + b2[i] + 2)>>2;
913 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
915 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
916 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
918 for(y=-2; y<height; y+=2){
919 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
920 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
922 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
923 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
925 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
926 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
933 static void horizontal_decompose97i(DWTELEM *b, int width){
935 const int w2= (width+1)>>1;
937 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
938 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
939 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
940 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
944 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
947 for(i=0; i<width; i++){
948 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
952 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
955 for(i=0; i<width; i++){
956 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
960 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
963 for(i=0; i<width; i++){
965 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
967 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
972 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
975 for(i=0; i<width; i++){
976 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
980 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
982 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
983 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
984 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
985 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
987 for(y=-4; y<height; y+=2){
988 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
989 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
991 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
992 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
994 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
995 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
996 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
997 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1006 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1009 for(level=0; level<decomposition_count; level++){
1011 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1012 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1017 static void horizontal_compose53i(IDWTELEM *b, int width){
1018 IDWTELEM temp[width];
1019 const int width2= width>>1;
1020 const int w2= (width+1)>>1;
1032 for(x=1; x+1<width2; x+=2){
1036 A2 += (A1 + A3 + 2)>>2;
1040 A1= temp[x+1+width2];
1043 A4 += (A1 + A3 + 2)>>2;
1049 A2 += (A1 + A3 + 2)>>2;
1053 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1054 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1056 for(x=0; x<width2; x++){
1058 b[2*x + 1]= temp[x+w2];
1064 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1067 for(i=0; i<width; i++){
1068 b1[i] += (b0[i] + b2[i])>>1;
1072 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1075 for(i=0; i<width; i++){
1076 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1080 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1081 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1082 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1086 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1087 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1088 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1092 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1095 IDWTELEM *b0= cs->b0;
1096 IDWTELEM *b1= cs->b1;
1097 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1098 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1100 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1101 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1103 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1104 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1111 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1113 IDWTELEM *b0= cs->b0;
1114 IDWTELEM *b1= cs->b1;
1115 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1116 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1118 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1119 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1121 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1122 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1129 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1131 spatial_compose53i_init(&cs, buffer, height, stride);
1132 while(cs.y <= height)
1133 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1137 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1138 IDWTELEM temp[width];
1139 const int w2= (width+1)>>1;
1141 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1142 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1143 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1144 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1147 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1150 for(i=0; i<width; i++){
1151 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1155 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1158 for(i=0; i<width; i++){
1159 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1163 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1166 for(i=0; i<width; i++){
1168 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1170 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1175 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1178 for(i=0; i<width; i++){
1179 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1183 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1186 for(i=0; i<width; i++){
1187 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1188 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1190 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1192 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1194 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1198 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1199 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1200 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1201 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1202 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1206 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1207 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1208 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1209 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1210 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1214 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1217 IDWTELEM *b0= cs->b0;
1218 IDWTELEM *b1= cs->b1;
1219 IDWTELEM *b2= cs->b2;
1220 IDWTELEM *b3= cs->b3;
1221 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1222 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1224 if(y>0 && y+4<height){
1225 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1227 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1228 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1229 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1230 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1233 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1234 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1243 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1245 IDWTELEM *b0= cs->b0;
1246 IDWTELEM *b1= cs->b1;
1247 IDWTELEM *b2= cs->b2;
1248 IDWTELEM *b3= cs->b3;
1249 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1250 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1252 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1253 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1254 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1255 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1257 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1258 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1267 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1269 spatial_compose97i_init(&cs, buffer, height, stride);
1270 while(cs.y <= height)
1271 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1274 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1276 for(level=decomposition_count-1; level>=0; level--){
1278 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1279 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1284 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1286 for(level=decomposition_count-1; level>=0; level--){
1288 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1289 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1294 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1295 const int support = type==1 ? 3 : 5;
1299 for(level=decomposition_count-1; level>=0; level--){
1300 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1302 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1304 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1311 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1312 const int support = type==1 ? 3 : 5;
1316 for(level=decomposition_count-1; level>=0; level--){
1317 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1319 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1321 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1328 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1329 DWTCompose cs[MAX_DECOMPOSITIONS];
1331 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1332 for(y=0; y<height; y+=4)
1333 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1336 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1337 const int w= b->width;
1338 const int h= b->height;
1350 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1351 v= src[x + y*stride];
1354 t= src[x + (y-1)*stride];
1356 lt= src[x - 1 + (y-1)*stride];
1359 rt= src[x + 1 + (y-1)*stride];
1363 l= src[x - 1 + y*stride];
1365 if(orientation==1) ll= src[y + (x-2)*stride];
1366 else ll= src[x - 2 + y*stride];
1372 if(px<b->parent->width && py<b->parent->height)
1373 p= parent[px + py*2*stride];
1375 if(!(/*ll|*/l|lt|t|rt|p)){
1377 runs[run_index++]= run;
1385 max_index= run_index;
1386 runs[run_index++]= run;
1388 run= runs[run_index++];
1390 put_symbol2(&s->c, b->state[30], max_index, 0);
1391 if(run_index <= max_index)
1392 put_symbol2(&s->c, b->state[1], run, 3);
1395 if(s->c.bytestream_end - s->c.bytestream < w*40){
1396 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1401 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1402 v= src[x + y*stride];
1405 t= src[x + (y-1)*stride];
1407 lt= src[x - 1 + (y-1)*stride];
1410 rt= src[x + 1 + (y-1)*stride];
1414 l= src[x - 1 + y*stride];
1416 if(orientation==1) ll= src[y + (x-2)*stride];
1417 else ll= src[x - 2 + y*stride];
1423 if(px<b->parent->width && py<b->parent->height)
1424 p= parent[px + py*2*stride];
1426 if(/*ll|*/l|lt|t|rt|p){
1427 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1429 put_rac(&s->c, &b->state[0][context], !!v);
1432 run= runs[run_index++];
1434 if(run_index <= max_index)
1435 put_symbol2(&s->c, b->state[1], run, 3);
1443 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1444 int l2= 2*FFABS(l) + (l<0);
1445 int t2= 2*FFABS(t) + (t<0);
1447 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1448 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1456 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1457 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1458 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1459 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1460 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1463 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1464 const int w= b->width;
1465 const int h= b->height;
1470 x_and_coeff *xc= b->x_coeff;
1471 x_and_coeff *prev_xc= NULL;
1472 x_and_coeff *prev2_xc= xc;
1473 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1474 x_and_coeff *prev_parent_xc= parent_xc;
1476 runs= get_symbol2(&s->c, b->state[30], 0);
1477 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1482 int lt=0, t=0, rt=0;
1484 if(y && prev_xc->x == 0){
1496 if(prev_xc->x == x + 1)
1502 if(x>>1 > parent_xc->x){
1505 if(x>>1 == parent_xc->x){
1506 p= parent_xc->coeff;
1509 if(/*ll|*/l|lt|t|rt|p){
1510 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1512 v=get_rac(&s->c, &b->state[0][context]);
1514 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1515 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1522 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1524 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1525 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1534 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1535 else max_run= FFMIN(run, w-x-1);
1537 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1543 (xc++)->x= w+1; //end marker
1549 while(parent_xc->x != parent->width+1)
1552 prev_parent_xc= parent_xc;
1554 parent_xc= prev_parent_xc;
1559 (xc++)->x= w+1; //end marker
1563 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1564 const int w= b->width;
1566 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1567 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1568 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1571 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1576 /* If we are on the second or later slice, restore our index. */
1578 new_index = save_state[0];
1581 for(y=start_y; y<h; y++){
1584 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1585 memset(line, 0, b->width*sizeof(IDWTELEM));
1586 v = b->x_coeff[new_index].coeff;
1587 x = b->x_coeff[new_index++].x;
1589 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1590 register int u= -(v&1);
1591 line[x] = (t^u) - u;
1593 v = b->x_coeff[new_index].coeff;
1594 x = b->x_coeff[new_index++].x;
1598 /* Save our variables for the next slice. */
1599 save_state[0] = new_index;
1604 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1605 int plane_index, level, orientation;
1607 for(plane_index=0; plane_index<3; plane_index++){
1608 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1609 for(orientation=level ? 1:0; orientation<4; orientation++){
1610 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1614 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1615 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1618 static int alloc_blocks(SnowContext *s){
1619 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1620 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1626 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1630 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1631 uint8_t *bytestream= d->bytestream;
1632 uint8_t *bytestream_start= d->bytestream_start;
1634 d->bytestream= bytestream;
1635 d->bytestream_start= bytestream_start;
1638 //near copy & paste from dsputil, FIXME
1639 static int pix_sum(uint8_t * pix, int line_size, int w)
1644 for (i = 0; i < w; i++) {
1645 for (j = 0; j < w; j++) {
1649 pix += line_size - w;
1654 //near copy & paste from dsputil, FIXME
1655 static int pix_norm1(uint8_t * pix, int line_size, int w)
1658 uint32_t *sq = ff_squareTbl + 256;
1661 for (i = 0; i < w; i++) {
1662 for (j = 0; j < w; j ++) {
1666 pix += line_size - w;
1671 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1672 const int w= s->b_width << s->block_max_depth;
1673 const int rem_depth= s->block_max_depth - level;
1674 const int index= (x + y*w) << rem_depth;
1675 const int block_w= 1<<rem_depth;
1688 for(j=0; j<block_w; j++){
1689 for(i=0; i<block_w; i++){
1690 s->block[index + i + j*w]= block;
1695 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1696 const int offset[3]= {
1698 ((y*c->uvstride + x)>>1),
1699 ((y*c->uvstride + x)>>1),
1703 c->src[0][i]= src [i];
1704 c->ref[0][i]= ref [i] + offset[i];
1709 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1710 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1711 if(s->ref_frames == 1){
1712 *mx = mid_pred(left->mx, top->mx, tr->mx);
1713 *my = mid_pred(left->my, top->my, tr->my);
1715 const int *scale = scale_mv_ref[ref];
1716 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1717 (top ->mx * scale[top ->ref] + 128) >>8,
1718 (tr ->mx * scale[tr ->ref] + 128) >>8);
1719 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1720 (top ->my * scale[top ->ref] + 128) >>8,
1721 (tr ->my * scale[tr ->ref] + 128) >>8);
1728 #define P_TOPRIGHT P[3]
1729 #define P_MEDIAN P[4]
1731 #define FLAG_QPEL 1 //must be 1
1733 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1734 uint8_t p_buffer[1024];
1735 uint8_t i_buffer[1024];
1736 uint8_t p_state[sizeof(s->block_state)];
1737 uint8_t i_state[sizeof(s->block_state)];
1739 uint8_t *pbbak= s->c.bytestream;
1740 uint8_t *pbbak_start= s->c.bytestream_start;
1741 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1742 const int w= s->b_width << s->block_max_depth;
1743 const int h= s->b_height << s->block_max_depth;
1744 const int rem_depth= s->block_max_depth - level;
1745 const int index= (x + y*w) << rem_depth;
1746 const int block_w= 1<<(LOG2_MB_SIZE - level);
1747 int trx= (x+1)<<rem_depth;
1748 int try= (y+1)<<rem_depth;
1749 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1750 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1751 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1752 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1753 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1754 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1755 int pl = left->color[0];
1756 int pcb= left->color[1];
1757 int pcr= left->color[2];
1761 const int stride= s->current_picture.linesize[0];
1762 const int uvstride= s->current_picture.linesize[1];
1763 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1764 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1765 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1767 int16_t last_mv[3][2];
1768 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1769 const int shift= 1+qpel;
1770 MotionEstContext *c= &s->m.me;
1771 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1772 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1773 int my_context= av_log2(2*FFABS(left->my - top->my));
1774 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1775 int ref, best_ref, ref_score, ref_mx, ref_my;
1777 assert(sizeof(s->block_state) >= 256);
1779 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1783 // clip predictors / edge ?
1785 P_LEFT[0]= left->mx;
1786 P_LEFT[1]= left->my;
1789 P_TOPRIGHT[0]= tr->mx;
1790 P_TOPRIGHT[1]= tr->my;
1792 last_mv[0][0]= s->block[index].mx;
1793 last_mv[0][1]= s->block[index].my;
1794 last_mv[1][0]= right->mx;
1795 last_mv[1][1]= right->my;
1796 last_mv[2][0]= bottom->mx;
1797 last_mv[2][1]= bottom->my;
1804 assert(c-> stride == stride);
1805 assert(c->uvstride == uvstride);
1807 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1808 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1809 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1810 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1812 c->xmin = - x*block_w - 16+2;
1813 c->ymin = - y*block_w - 16+2;
1814 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1815 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1817 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1818 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1819 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1820 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1821 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1822 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1823 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1825 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1826 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1829 c->pred_x= P_LEFT[0];
1830 c->pred_y= P_LEFT[1];
1832 c->pred_x = P_MEDIAN[0];
1833 c->pred_y = P_MEDIAN[1];
1838 for(ref=0; ref<s->ref_frames; ref++){
1839 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1841 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1842 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1844 assert(ref_mx >= c->xmin);
1845 assert(ref_mx <= c->xmax);
1846 assert(ref_my >= c->ymin);
1847 assert(ref_my <= c->ymax);
1849 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1850 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1851 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1852 if(s->ref_mvs[ref]){
1853 s->ref_mvs[ref][index][0]= ref_mx;
1854 s->ref_mvs[ref][index][1]= ref_my;
1855 s->ref_scores[ref][index]= ref_score;
1857 if(score > ref_score){
1864 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1867 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1869 pc.bytestream_start=
1870 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1871 memcpy(p_state, s->block_state, sizeof(s->block_state));
1873 if(level!=s->block_max_depth)
1874 put_rac(&pc, &p_state[4 + s_context], 1);
1875 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1876 if(s->ref_frames > 1)
1877 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1878 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1879 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1880 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1881 p_len= pc.bytestream - pc.bytestream_start;
1882 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1884 block_s= block_w*block_w;
1885 sum = pix_sum(current_data[0], stride, block_w);
1886 l= (sum + block_s/2)/block_s;
1887 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1889 block_s= block_w*block_w>>2;
1890 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1891 cb= (sum + block_s/2)/block_s;
1892 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1893 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1894 cr= (sum + block_s/2)/block_s;
1895 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1898 ic.bytestream_start=
1899 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1900 memcpy(i_state, s->block_state, sizeof(s->block_state));
1901 if(level!=s->block_max_depth)
1902 put_rac(&ic, &i_state[4 + s_context], 1);
1903 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1904 put_symbol(&ic, &i_state[32], l-pl , 1);
1905 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1906 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1907 i_len= ic.bytestream - ic.bytestream_start;
1908 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1910 // assert(score==256*256*256*64-1);
1911 assert(iscore < 255*255*256 + s->lambda2*10);
1912 assert(iscore >= 0);
1913 assert(l>=0 && l<=255);
1914 assert(pl>=0 && pl<=255);
1917 int varc= iscore >> 8;
1918 int vard= score >> 8;
1919 if (vard <= 64 || vard < varc)
1920 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1922 c->scene_change_score+= s->m.qscale;
1925 if(level!=s->block_max_depth){
1926 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1927 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1928 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1929 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1931 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1933 if(score2 < score && score2 < iscore)
1938 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1939 memcpy(pbbak, i_buffer, i_len);
1941 s->c.bytestream_start= pbbak_start;
1942 s->c.bytestream= pbbak + i_len;
1943 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1944 memcpy(s->block_state, i_state, sizeof(s->block_state));
1947 memcpy(pbbak, p_buffer, p_len);
1949 s->c.bytestream_start= pbbak_start;
1950 s->c.bytestream= pbbak + p_len;
1951 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1952 memcpy(s->block_state, p_state, sizeof(s->block_state));
1957 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1958 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1959 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1961 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1965 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1966 const int w= s->b_width << s->block_max_depth;
1967 const int rem_depth= s->block_max_depth - level;
1968 const int index= (x + y*w) << rem_depth;
1969 int trx= (x+1)<<rem_depth;
1970 BlockNode *b= &s->block[index];
1971 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1972 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1973 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1974 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1975 int pl = left->color[0];
1976 int pcb= left->color[1];
1977 int pcr= left->color[2];
1979 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1980 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1981 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1982 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1985 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1989 if(level!=s->block_max_depth){
1990 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1991 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1993 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1994 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1995 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2001 if(b->type & BLOCK_INTRA){
2002 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2003 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2004 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2005 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2006 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2007 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2009 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2010 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2011 if(s->ref_frames > 1)
2012 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2013 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2014 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2015 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2019 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2020 const int w= s->b_width << s->block_max_depth;
2021 const int rem_depth= s->block_max_depth - level;
2022 const int index= (x + y*w) << rem_depth;
2023 int trx= (x+1)<<rem_depth;
2024 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2025 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2026 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2027 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2028 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2031 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2035 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2037 int l = left->color[0];
2038 int cb= left->color[1];
2039 int cr= left->color[2];
2041 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2042 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2043 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2045 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2048 pred_mv(s, &mx, &my, 0, left, top, tr);
2049 l += get_symbol(&s->c, &s->block_state[32], 1);
2050 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2051 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2053 if(s->ref_frames > 1)
2054 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2055 pred_mv(s, &mx, &my, ref, left, top, tr);
2056 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2057 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2059 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2068 static void encode_blocks(SnowContext *s, int search){
2073 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2077 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2078 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2082 if(s->avctx->me_method == ME_ITER || !search)
2083 encode_q_branch2(s, 0, x, y);
2085 encode_q_branch (s, 0, x, y);
2090 static void decode_blocks(SnowContext *s){
2097 decode_q_branch(s, 0, x, y);
2102 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2103 static const uint8_t weight[64]={
2114 static const uint8_t brane[256]={
2115 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2116 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2117 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2118 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2119 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2120 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2121 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2122 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2123 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2124 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2125 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2126 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2127 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2128 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2129 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2130 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2133 static const uint8_t needs[16]={
2141 int16_t tmpIt [64*(32+HTAPS_MAX)];
2142 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2143 int16_t *tmpI= tmpIt;
2144 uint8_t *tmp2= tmp2t[0];
2145 const uint8_t *hpel[11];
2146 assert(dx<16 && dy<16);
2147 r= brane[dx + 16*dy]&15;
2148 l= brane[dx + 16*dy]>>4;
2150 b= needs[l] | needs[r];
2151 if(p && !p->diag_mc)
2155 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2156 for(x=0; x < b_w; x++){
2157 int a_1=src[x + HTAPS_MAX/2-4];
2158 int a0= src[x + HTAPS_MAX/2-3];
2159 int a1= src[x + HTAPS_MAX/2-2];
2160 int a2= src[x + HTAPS_MAX/2-1];
2161 int a3= src[x + HTAPS_MAX/2+0];
2162 int a4= src[x + HTAPS_MAX/2+1];
2163 int a5= src[x + HTAPS_MAX/2+2];
2164 int a6= src[x + HTAPS_MAX/2+3];
2166 if(!p || p->fast_mc){
2167 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2171 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2176 if(am&(~255)) am= ~(am>>31);
2185 src += HTAPS_MAX/2 - 1;
2189 for(y=0; y < b_h; y++){
2190 for(x=0; x < b_w+1; x++){
2191 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2192 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2193 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2194 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2195 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2196 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2197 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2198 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2200 if(!p || p->fast_mc)
2201 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2203 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2205 if(am&(~255)) am= ~(am>>31);
2213 src += stride*(HTAPS_MAX/2 - 1);
2217 for(y=0; y < b_h; y++){
2218 for(x=0; x < b_w; x++){
2219 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2220 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2221 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2222 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2223 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2224 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2225 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2226 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2228 if(!p || p->fast_mc)
2229 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2231 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2232 if(am&(~255)) am= ~(am>>31);
2241 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2246 hpel[ 6]= tmp2t[1] + 1;
2248 hpel[ 8]= src + stride;
2249 hpel[ 9]= hpel[1] + stride;
2250 hpel[10]= hpel[8] + 1;
2253 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2254 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2255 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2256 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2259 for(y=0; y < b_h; y++){
2260 for(x=0; x < b_w; x++){
2261 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2262 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2271 const uint8_t *src1= hpel[l];
2272 const uint8_t *src2= hpel[r];
2273 int a= weight[((dx&7) + (8*(dy&7)))];
2275 for(y=0; y < b_h; y++){
2276 for(x=0; x < b_w; x++){
2277 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2286 #define mca(dx,dy,b_w)\
2287 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2288 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2290 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2302 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2303 if(block->type & BLOCK_INTRA){
2305 const int color = block->color[plane_index];
2306 const int color4= color*0x01010101;
2308 for(y=0; y < b_h; y++){
2309 *(uint32_t*)&dst[0 + y*stride]= color4;
2310 *(uint32_t*)&dst[4 + y*stride]= color4;
2311 *(uint32_t*)&dst[8 + y*stride]= color4;
2312 *(uint32_t*)&dst[12+ y*stride]= color4;
2313 *(uint32_t*)&dst[16+ y*stride]= color4;
2314 *(uint32_t*)&dst[20+ y*stride]= color4;
2315 *(uint32_t*)&dst[24+ y*stride]= color4;
2316 *(uint32_t*)&dst[28+ y*stride]= color4;
2319 for(y=0; y < b_h; y++){
2320 *(uint32_t*)&dst[0 + y*stride]= color4;
2321 *(uint32_t*)&dst[4 + y*stride]= color4;
2322 *(uint32_t*)&dst[8 + y*stride]= color4;
2323 *(uint32_t*)&dst[12+ y*stride]= color4;
2326 for(y=0; y < b_h; y++){
2327 *(uint32_t*)&dst[0 + y*stride]= color4;
2328 *(uint32_t*)&dst[4 + y*stride]= color4;
2331 for(y=0; y < b_h; y++){
2332 *(uint32_t*)&dst[0 + y*stride]= color4;
2335 for(y=0; y < b_h; y++){
2336 for(x=0; x < b_w; x++){
2337 dst[x + y*stride]= color;
2342 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2343 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2344 int mx= block->mx*scale;
2345 int my= block->my*scale;
2346 const int dx= mx&15;
2347 const int dy= my&15;
2348 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2349 sx += (mx>>4) - (HTAPS_MAX/2-1);
2350 sy += (my>>4) - (HTAPS_MAX/2-1);
2351 src += sx + sy*stride;
2352 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2353 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2354 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2357 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2358 // assert(!(b_w&(b_w-1)));
2359 assert(b_w>1 && b_h>1);
2360 assert((tab_index>=0 && tab_index<4) || b_w==32);
2361 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2362 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2365 for(y=0; y<b_h; y+=16){
2366 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2367 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2370 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2371 else if(b_w==2*b_h){
2372 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2373 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2382 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2383 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2386 for(y=0; y<b_h; y++){
2387 //FIXME ugly misuse of obmc_stride
2388 const uint8_t *obmc1= obmc + y*obmc_stride;
2389 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2390 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2391 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2392 dst = slice_buffer_get_line(sb, src_y + y);
2393 for(x=0; x<b_w; x++){
2394 int v= obmc1[x] * block[3][x + y*src_stride]
2395 +obmc2[x] * block[2][x + y*src_stride]
2396 +obmc3[x] * block[1][x + y*src_stride]
2397 +obmc4[x] * block[0][x + y*src_stride];
2399 v <<= 8 - LOG2_OBMC_MAX;
2401 v >>= 8 - FRAC_BITS;
2404 v += dst[x + src_x];
2405 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2406 if(v&(~255)) v= ~(v>>31);
2407 dst8[x + y*src_stride] = v;
2409 dst[x + src_x] -= v;
2415 //FIXME name cleanup (b_w, block_w, b_width stuff)
2416 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2417 const int b_width = s->b_width << s->block_max_depth;
2418 const int b_height= s->b_height << s->block_max_depth;
2419 const int b_stride= b_width;
2420 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2421 BlockNode *rt= lt+1;
2422 BlockNode *lb= lt+b_stride;
2423 BlockNode *rb= lb+1;
2425 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2426 uint8_t *tmp = s->scratchbuf;
2433 }else if(b_x + 1 >= b_width){
2440 }else if(b_y + 1 >= b_height){
2445 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2448 if(!sliced && !offset_dst)
2451 }else if(src_x + b_w > w){
2455 obmc -= src_y*obmc_stride;
2457 if(!sliced && !offset_dst)
2458 dst -= src_y*dst_stride;
2460 }else if(src_y + b_h> h){
2464 if(b_w<=0 || b_h<=0) return;
2466 assert(src_stride > 2*MB_SIZE + 5);
2468 if(!sliced && offset_dst)
2469 dst += src_x + src_y*dst_stride;
2470 dst8+= src_x + src_y*src_stride;
2471 // src += src_x + src_y*src_stride;
2473 ptmp= tmp + 3*tmp_step;
2476 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2478 if(same_block(lt, rt)){
2483 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2486 if(same_block(lt, lb)){
2488 }else if(same_block(rt, lb)){
2493 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2496 if(same_block(lt, rb) ){
2498 }else if(same_block(rt, rb)){
2500 }else if(same_block(lb, rb)){
2504 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2507 for(y=0; y<b_h; y++){
2508 for(x=0; x<b_w; x++){
2509 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2510 if(add) dst[x + y*dst_stride] += v;
2511 else dst[x + y*dst_stride] -= v;
2514 for(y=0; y<b_h; y++){
2515 uint8_t *obmc2= obmc + (obmc_stride>>1);
2516 for(x=0; x<b_w; x++){
2517 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2518 if(add) dst[x + y*dst_stride] += v;
2519 else dst[x + y*dst_stride] -= v;
2522 for(y=0; y<b_h; y++){
2523 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2524 for(x=0; x<b_w; x++){
2525 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2526 if(add) dst[x + y*dst_stride] += v;
2527 else dst[x + y*dst_stride] -= v;
2530 for(y=0; y<b_h; y++){
2531 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2532 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2533 for(x=0; x<b_w; x++){
2534 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2535 if(add) dst[x + y*dst_stride] += v;
2536 else dst[x + y*dst_stride] -= v;
2541 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2543 for(y=0; y<b_h; y++){
2544 //FIXME ugly misuse of obmc_stride
2545 const uint8_t *obmc1= obmc + y*obmc_stride;
2546 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2547 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2548 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2549 for(x=0; x<b_w; x++){
2550 int v= obmc1[x] * block[3][x + y*src_stride]
2551 +obmc2[x] * block[2][x + y*src_stride]
2552 +obmc3[x] * block[1][x + y*src_stride]
2553 +obmc4[x] * block[0][x + y*src_stride];
2555 v <<= 8 - LOG2_OBMC_MAX;
2557 v >>= 8 - FRAC_BITS;
2560 v += dst[x + y*dst_stride];
2561 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2562 if(v&(~255)) v= ~(v>>31);
2563 dst8[x + y*src_stride] = v;
2565 dst[x + y*dst_stride] -= v;
2573 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2574 Plane *p= &s->plane[plane_index];
2575 const int mb_w= s->b_width << s->block_max_depth;
2576 const int mb_h= s->b_height << s->block_max_depth;
2578 int block_size = MB_SIZE >> s->block_max_depth;
2579 int block_w = plane_index ? block_size/2 : block_size;
2580 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2581 int obmc_stride= plane_index ? block_size : 2*block_size;
2582 int ref_stride= s->current_picture.linesize[plane_index];
2583 uint8_t *dst8= s->current_picture.data[plane_index];
2587 if(s->keyframe || (s->avctx->debug&512)){
2592 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2593 // DWTELEM * line = slice_buffer_get_line(sb, y);
2594 IDWTELEM * line = sb->line[y];
2596 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2597 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2599 if(v&(~255)) v= ~(v>>31);
2600 dst8[x + y*ref_stride]= v;
2604 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2605 // DWTELEM * line = slice_buffer_get_line(sb, y);
2606 IDWTELEM * line = sb->line[y];
2608 line[x] -= 128 << FRAC_BITS;
2609 // buf[x + y*w]-= 128<<FRAC_BITS;
2617 for(mb_x=0; mb_x<=mb_w; mb_x++){
2618 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2619 block_w*mb_x - block_w/2,
2620 block_w*mb_y - block_w/2,
2623 w, ref_stride, obmc_stride,
2625 add, 0, plane_index);
2629 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2630 Plane *p= &s->plane[plane_index];
2631 const int mb_w= s->b_width << s->block_max_depth;
2632 const int mb_h= s->b_height << s->block_max_depth;
2634 int block_size = MB_SIZE >> s->block_max_depth;
2635 int block_w = plane_index ? block_size/2 : block_size;
2636 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2637 const int obmc_stride= plane_index ? block_size : 2*block_size;
2638 int ref_stride= s->current_picture.linesize[plane_index];
2639 uint8_t *dst8= s->current_picture.data[plane_index];
2643 if(s->keyframe || (s->avctx->debug&512)){
2648 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2650 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2652 if(v&(~255)) v= ~(v>>31);
2653 dst8[x + y*ref_stride]= v;
2657 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2659 buf[x + y*w]-= 128<<FRAC_BITS;
2667 for(mb_x=0; mb_x<=mb_w; mb_x++){
2668 add_yblock(s, 0, NULL, buf, dst8, obmc,
2669 block_w*mb_x - block_w/2,
2670 block_w*mb_y - block_w/2,
2673 w, ref_stride, obmc_stride,
2675 add, 1, plane_index);
2679 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2680 const int mb_h= s->b_height << s->block_max_depth;
2682 for(mb_y=0; mb_y<=mb_h; mb_y++)
2683 predict_slice(s, buf, plane_index, add, mb_y);
2686 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2688 Plane *p= &s->plane[plane_index];
2689 const int block_size = MB_SIZE >> s->block_max_depth;
2690 const int block_w = plane_index ? block_size/2 : block_size;
2691 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2692 const int obmc_stride= plane_index ? block_size : 2*block_size;
2693 const int ref_stride= s->current_picture.linesize[plane_index];
2694 uint8_t *src= s-> input_picture.data[plane_index];
2695 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2696 const int b_stride = s->b_width << s->block_max_depth;
2697 const int w= p->width;
2698 const int h= p->height;
2699 int index= mb_x + mb_y*b_stride;
2700 BlockNode *b= &s->block[index];
2701 BlockNode backup= *b;
2705 b->type|= BLOCK_INTRA;
2706 b->color[plane_index]= 0;
2707 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2710 int mb_x2= mb_x + (i &1) - 1;
2711 int mb_y2= mb_y + (i>>1) - 1;
2712 int x= block_w*mb_x2 + block_w/2;
2713 int y= block_w*mb_y2 + block_w/2;
2715 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2716 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2718 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2719 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2720 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2721 int obmc_v= obmc[index];
2723 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2724 if(x<0) obmc_v += obmc[index + block_w];
2725 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2726 if(x+block_w>w) obmc_v += obmc[index - block_w];
2727 //FIXME precalculate this or simplify it somehow else
2729 d = -dst[index] + (1<<(FRAC_BITS-1));
2731 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2732 aa += obmc_v * obmc_v; //FIXME precalculate this
2738 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2741 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2742 const int b_stride = s->b_width << s->block_max_depth;
2743 const int b_height = s->b_height<< s->block_max_depth;
2744 int index= x + y*b_stride;
2745 const BlockNode *b = &s->block[index];
2746 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2747 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2748 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2749 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2751 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2752 // int my_context= av_log2(2*FFABS(left->my - top->my));
2754 if(x<0 || x>=b_stride || y>=b_height)
2761 00001XXXX 15-30 8-15
2763 //FIXME try accurate rate
2764 //FIXME intra and inter predictors if surrounding blocks are not the same type
2765 if(b->type & BLOCK_INTRA){
2766 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2767 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2768 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2770 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2773 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2774 + av_log2(2*FFABS(dmy))
2775 + av_log2(2*b->ref));
2779 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2780 Plane *p= &s->plane[plane_index];
2781 const int block_size = MB_SIZE >> s->block_max_depth;
2782 const int block_w = plane_index ? block_size/2 : block_size;
2783 const int obmc_stride= plane_index ? block_size : 2*block_size;
2784 const int ref_stride= s->current_picture.linesize[plane_index];
2785 uint8_t *dst= s->current_picture.data[plane_index];
2786 uint8_t *src= s-> input_picture.data[plane_index];
2787 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2788 uint8_t *cur = s->scratchbuf;
2789 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2790 const int b_stride = s->b_width << s->block_max_depth;
2791 const int b_height = s->b_height<< s->block_max_depth;
2792 const int w= p->width;
2793 const int h= p->height;
2796 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2797 int sx= block_w*mb_x - block_w/2;
2798 int sy= block_w*mb_y - block_w/2;
2799 int x0= FFMAX(0,-sx);
2800 int y0= FFMAX(0,-sy);
2801 int x1= FFMIN(block_w*2, w-sx);
2802 int y1= FFMIN(block_w*2, h-sy);
2805 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2807 for(y=y0; y<y1; y++){
2808 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2809 const IDWTELEM *pred1 = pred + y*obmc_stride;
2810 uint8_t *cur1 = cur + y*ref_stride;
2811 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2812 for(x=x0; x<x1; x++){
2813 #if FRAC_BITS >= LOG2_OBMC_MAX
2814 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2816 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2818 v = (v + pred1[x]) >> FRAC_BITS;
2819 if(v&(~255)) v= ~(v>>31);
2824 /* copy the regions where obmc[] = (uint8_t)256 */
2825 if(LOG2_OBMC_MAX == 8
2826 && (mb_x == 0 || mb_x == b_stride-1)
2827 && (mb_y == 0 || mb_y == b_height-1)){
2836 for(y=y0; y<y1; y++)
2837 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2841 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2842 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2843 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2844 * So improving the score of one block is not strictly guaranteed
2845 * to improve the score of the whole frame, thus iterative motion
2846 * estimation does not always converge. */
2847 if(s->avctx->me_cmp == FF_CMP_W97)
2848 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2849 else if(s->avctx->me_cmp == FF_CMP_W53)
2850 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2854 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2855 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2860 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2869 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2871 if(mb_x == b_stride-2)
2872 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2874 return distortion + rate*penalty_factor;
2877 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2879 Plane *p= &s->plane[plane_index];
2880 const int block_size = MB_SIZE >> s->block_max_depth;
2881 const int block_w = plane_index ? block_size/2 : block_size;
2882 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2883 const int obmc_stride= plane_index ? block_size : 2*block_size;
2884 const int ref_stride= s->current_picture.linesize[plane_index];
2885 uint8_t *dst= s->current_picture.data[plane_index];
2886 uint8_t *src= s-> input_picture.data[plane_index];
2887 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2888 // const has only been removed from zero_dst to suppress a warning
2889 static IDWTELEM zero_dst[4096]; //FIXME
2890 const int b_stride = s->b_width << s->block_max_depth;
2891 const int w= p->width;
2892 const int h= p->height;
2895 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2898 int mb_x2= mb_x + (i%3) - 1;
2899 int mb_y2= mb_y + (i/3) - 1;
2900 int x= block_w*mb_x2 + block_w/2;
2901 int y= block_w*mb_y2 + block_w/2;
2903 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2904 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2906 //FIXME find a cleaner/simpler way to skip the outside stuff
2907 for(y2= y; y2<0; y2++)
2908 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2909 for(y2= h; y2<y+block_w; y2++)
2910 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2912 for(y2= y; y2<y+block_w; y2++)
2913 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2916 for(y2= y; y2<y+block_w; y2++)
2917 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2920 assert(block_w== 8 || block_w==16);
2921 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2925 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2926 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2934 rate = get_block_bits(s, mb_x, mb_y, 2);
2935 for(i=merged?4:0; i<9; i++){
2936 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2937 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2940 return distortion + rate*penalty_factor;
2943 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2944 const int b_stride= s->b_width << s->block_max_depth;
2945 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2946 BlockNode backup= *block;
2947 int rd, index, value;
2949 assert(mb_x>=0 && mb_y>=0);
2950 assert(mb_x<b_stride);
2953 block->color[0] = p[0];
2954 block->color[1] = p[1];
2955 block->color[2] = p[2];
2956 block->type |= BLOCK_INTRA;
2958 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2959 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2960 if(s->me_cache[index] == value)
2962 s->me_cache[index]= value;
2966 block->type &= ~BLOCK_INTRA;
2969 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2981 /* special case for int[2] args we discard afterwards,
2982 * fixes compilation problem with gcc 2.95 */
2983 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2984 int p[2] = {p0, p1};
2985 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2988 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2989 const int b_stride= s->b_width << s->block_max_depth;
2990 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2991 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2992 int rd, index, value;
2994 assert(mb_x>=0 && mb_y>=0);
2995 assert(mb_x<b_stride);
2996 assert(((mb_x|mb_y)&1) == 0);
2998 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2999 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3000 if(s->me_cache[index] == value)
3002 s->me_cache[index]= value;
3007 block->type &= ~BLOCK_INTRA;
3008 block[1]= block[b_stride]= block[b_stride+1]= *block;
3010 rd= get_4block_rd(s, mb_x, mb_y, 0);
3017 block[0]= backup[0];
3018 block[1]= backup[1];
3019 block[b_stride]= backup[2];
3020 block[b_stride+1]= backup[3];
3025 static void iterative_me(SnowContext *s){
3026 int pass, mb_x, mb_y;
3027 const int b_width = s->b_width << s->block_max_depth;
3028 const int b_height= s->b_height << s->block_max_depth;
3029 const int b_stride= b_width;
3033 RangeCoder r = s->c;
3034 uint8_t state[sizeof(s->block_state)];
3035 memcpy(state, s->block_state, sizeof(s->block_state));
3036 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3037 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3038 encode_q_branch(s, 0, mb_x, mb_y);
3040 memcpy(s->block_state, state, sizeof(s->block_state));
3043 for(pass=0; pass<25; pass++){
3046 for(mb_y= 0; mb_y<b_height; mb_y++){
3047 for(mb_x= 0; mb_x<b_width; mb_x++){
3048 int dia_change, i, j, ref;
3049 int best_rd= INT_MAX, ref_rd;
3050 BlockNode backup, ref_b;
3051 const int index= mb_x + mb_y * b_stride;
3052 BlockNode *block= &s->block[index];
3053 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3054 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3055 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3056 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3057 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3058 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3059 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3060 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3061 const int b_w= (MB_SIZE >> s->block_max_depth);
3062 uint8_t obmc_edged[b_w*2][b_w*2];
3064 if(pass && (block->type & BLOCK_OPT))
3066 block->type |= BLOCK_OPT;
3070 if(!s->me_cache_generation)
3071 memset(s->me_cache, 0, sizeof(s->me_cache));
3072 s->me_cache_generation += 1<<22;
3074 //FIXME precalculate
3077 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3079 for(y=0; y<b_w*2; y++)
3080 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3081 if(mb_x==b_stride-1)
3082 for(y=0; y<b_w*2; y++)
3083 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3085 for(x=0; x<b_w*2; x++)
3086 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3087 for(y=1; y<b_w; y++)
3088 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3090 if(mb_y==b_height-1){
3091 for(x=0; x<b_w*2; x++)
3092 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3093 for(y=b_w; y<b_w*2-1; y++)
3094 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3098 //skip stuff outside the picture
3099 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3100 uint8_t *src= s-> input_picture.data[0];
3101 uint8_t *dst= s->current_picture.data[0];
3102 const int stride= s->current_picture.linesize[0];
3103 const int block_w= MB_SIZE >> s->block_max_depth;
3104 const int sx= block_w*mb_x - block_w/2;
3105 const int sy= block_w*mb_y - block_w/2;
3106 const int w= s->plane[0].width;
3107 const int h= s->plane[0].height;
3111 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3112 for(y=h; y<sy+block_w*2; y++)
3113 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3115 for(y=sy; y<sy+block_w*2; y++)
3116 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3118 if(sx+block_w*2 > w){
3119 for(y=sy; y<sy+block_w*2; y++)
3120 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3124 // intra(black) = neighbors' contribution to the current block
3126 color[i]= get_dc(s, mb_x, mb_y, i);
3128 // get previous score (cannot be cached due to OBMC)
3129 if(pass > 0 && (block->type&BLOCK_INTRA)){
3130 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3131 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3133 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3137 for(ref=0; ref < s->ref_frames; ref++){
3138 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3139 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3144 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3145 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3147 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3149 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3151 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3153 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3156 //FIXME avoid subpel interpolation / round to nearest integer
3159 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3161 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3162 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3163 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3164 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3170 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3173 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3175 //FIXME or try the standard 2 pass qpel or similar
3177 mvr[0][0]= block->mx;
3178 mvr[0][1]= block->my;
3179 if(ref_rd > best_rd){
3187 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3188 //FIXME RD style color selection
3190 if(!same_block(block, &backup)){
3191 if(tb ) tb ->type &= ~BLOCK_OPT;
3192 if(lb ) lb ->type &= ~BLOCK_OPT;
3193 if(rb ) rb ->type &= ~BLOCK_OPT;
3194 if(bb ) bb ->type &= ~BLOCK_OPT;
3195 if(tlb) tlb->type &= ~BLOCK_OPT;
3196 if(trb) trb->type &= ~BLOCK_OPT;
3197 if(blb) blb->type &= ~BLOCK_OPT;
3198 if(brb) brb->type &= ~BLOCK_OPT;
3203 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3208 if(s->block_max_depth == 1){
3210 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3211 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3213 int best_rd, init_rd;
3214 const int index= mb_x + mb_y * b_stride;
3217 b[0]= &s->block[index];
3219 b[2]= b[0]+b_stride;
3221 if(same_block(b[0], b[1]) &&
3222 same_block(b[0], b[2]) &&
3223 same_block(b[0], b[3]))
3226 if(!s->me_cache_generation)
3227 memset(s->me_cache, 0, sizeof(s->me_cache));
3228 s->me_cache_generation += 1<<22;
3230 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3232 //FIXME more multiref search?
3233 check_4block_inter(s, mb_x, mb_y,
3234 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3235 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3238 if(!(b[i]->type&BLOCK_INTRA))
3239 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3241 if(init_rd != best_rd)
3245 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3249 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3250 const int w= b->width;
3251 const int h= b->height;
3252 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3253 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3254 int x,y, thres1, thres2;
3256 if(s->qlog == LOSSLESS_QLOG){
3259 dst[x + y*stride]= src[x + y*stride];
3263 bias= bias ? 0 : (3*qmul)>>3;
3264 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3270 int i= src[x + y*stride];
3272 if((unsigned)(i+thres1) > thres2){
3275 i/= qmul; //FIXME optimize
3276 dst[x + y*stride]= i;
3280 i/= qmul; //FIXME optimize
3281 dst[x + y*stride]= -i;
3284 dst[x + y*stride]= 0;
3290 int i= src[x + y*stride];
3292 if((unsigned)(i+thres1) > thres2){
3295 i= (i + bias) / qmul; //FIXME optimize
3296 dst[x + y*stride]= i;
3300 i= (i + bias) / qmul; //FIXME optimize
3301 dst[x + y*stride]= -i;
3304 dst[x + y*stride]= 0;
3310 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3311 const int w= b->width;
3312 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3313 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3314 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3317 if(s->qlog == LOSSLESS_QLOG) return;
3319 for(y=start_y; y<end_y; y++){
3320 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3321 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3325 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3327 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3333 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3334 const int w= b->width;
3335 const int h= b->height;
3336 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3337 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3338 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3341 if(s->qlog == LOSSLESS_QLOG) return;
3345 int i= src[x + y*stride];
3347 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3349 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3355 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3356 const int w= b->width;
3357 const int h= b->height;
3360 for(y=h-1; y>=0; y--){
3361 for(x=w-1; x>=0; x--){
3362 int i= x + y*stride;
3366 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3367 else src[i] -= src[i - 1];
3369 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3370 else src[i] -= src[i - 1];
3373 if(y) src[i] -= src[i - stride];
3379 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3380 const int w= b->width;
3383 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3387 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3389 for(y=start_y; y<end_y; y++){
3391 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3392 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3396 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3397 else line[x] += line[x - 1];
3399 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3400 else line[x] += line[x - 1];
3403 if(y) line[x] += prev[x];
3409 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3410 const int w= b->width;
3411 const int h= b->height;
3416 int i= x + y*stride;
3420 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3421 else src[i] += src[i - 1];
3423 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3424 else src[i] += src[i - 1];
3427 if(y) src[i] += src[i - stride];
3433 static void encode_qlogs(SnowContext *s){
3434 int plane_index, level, orientation;
3436 for(plane_index=0; plane_index<2; plane_index++){
3437 for(level=0; level<s->spatial_decomposition_count; level++){
3438 for(orientation=level ? 1:0; orientation<4; orientation++){
3439 if(orientation==2) continue;
3440 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3446 static void encode_header(SnowContext *s){
3450 memset(kstate, MID_STATE, sizeof(kstate));
3452 put_rac(&s->c, kstate, s->keyframe);
3453 if(s->keyframe || s->always_reset){
3455 s->last_spatial_decomposition_type=
3459 s->last_block_max_depth= 0;
3460 for(plane_index=0; plane_index<2; plane_index++){
3461 Plane *p= &s->plane[plane_index];
3464 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3468 put_symbol(&s->c, s->header_state, s->version, 0);
3469 put_rac(&s->c, s->header_state, s->always_reset);
3470 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3471 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3472 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3473 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3474 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3475 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3476 put_rac(&s->c, s->header_state, s->spatial_scalability);
3477 // put_rac(&s->c, s->header_state, s->rate_scalability);
3478 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3485 for(plane_index=0; plane_index<2; plane_index++){
3486 Plane *p= &s->plane[plane_index];
3487 update_mc |= p->last_htaps != p->htaps;
3488 update_mc |= p->last_diag_mc != p->diag_mc;
3489 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3491 put_rac(&s->c, s->header_state, update_mc);
3493 for(plane_index=0; plane_index<2; plane_index++){
3494 Plane *p= &s->plane[plane_index];
3495 put_rac(&s->c, s->header_state, p->diag_mc);
3496 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3497 for(i= p->htaps/2; i; i--)
3498 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3501 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3502 put_rac(&s->c, s->header_state, 1);
3503 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3506 put_rac(&s->c, s->header_state, 0);
3509 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3510 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3511 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3512 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3513 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3517 static void update_last_header_values(SnowContext *s){
3521 for(plane_index=0; plane_index<2; plane_index++){
3522 Plane *p= &s->plane[plane_index];
3523 p->last_diag_mc= p->diag_mc;
3524 p->last_htaps = p->htaps;
3525 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3529 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3530 s->last_qlog = s->qlog;
3531 s->last_qbias = s->qbias;
3532 s->last_mv_scale = s->mv_scale;
3533 s->last_block_max_depth = s->block_max_depth;
3534 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3537 static void decode_qlogs(SnowContext *s){
3538 int plane_index, level, orientation;
3540 for(plane_index=0; plane_index<3; plane_index++){
3541 for(level=0; level<s->spatial_decomposition_count; level++){
3542 for(orientation=level ? 1:0; orientation<4; orientation++){
3544 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3545 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3546 else q= get_symbol(&s->c, s->header_state, 1);
3547 s->plane[plane_index].band[level][orientation].qlog= q;
3553 #define GET_S(dst, check) \
3554 tmp= get_symbol(&s->c, s->header_state, 0);\
3556 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
3561 static int decode_header(SnowContext *s){
3562 int plane_index, tmp;
3565 memset(kstate, MID_STATE, sizeof(kstate));
3567 s->keyframe= get_rac(&s->c, kstate);
3568 if(s->keyframe || s->always_reset){
3570 s->spatial_decomposition_type=
3574 s->block_max_depth= 0;
3577 GET_S(s->version, tmp <= 0U)
3578 s->always_reset= get_rac(&s->c, s->header_state);
3579 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3580 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3581 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3582 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3583 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3584 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3585 s->spatial_scalability= get_rac(&s->c, s->header_state);
3586 // s->rate_scalability= get_rac(&s->c, s->header_state);
3587 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
3588 s->max_ref_frames++;
3594 if(get_rac(&s->c, s->header_state)){
3595 for(plane_index=0; plane_index<2; plane_index++){
3596 int htaps, i, sum=0;
3597 Plane *p= &s->plane[plane_index];
3598 p->diag_mc= get_rac(&s->c, s->header_state);
3599 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3600 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3603 for(i= htaps/2; i; i--){
3604 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3605 sum += p->hcoeff[i];
3607 p->hcoeff[0]= 32-sum;
3609 s->plane[2].diag_mc= s->plane[1].diag_mc;
3610 s->plane[2].htaps = s->plane[1].htaps;
3611 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3613 if(get_rac(&s->c, s->header_state)){
3614 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3619 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3620 if(s->spatial_decomposition_type > 1U){
3621 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3624 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
3625 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
3626 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
3630 s->qlog += get_symbol(&s->c, s->header_state, 1);
3631 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3632 s->qbias += get_symbol(&s->c, s->header_state, 1);
3633 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3634 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3635 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3636 s->block_max_depth= 0;
3643 static void init_qexp(void){
3647 for(i=0; i<QROOT; i++){
3649 v *= pow(2, 1.0 / QROOT);
3653 static av_cold int common_init(AVCodecContext *avctx){
3654 SnowContext *s = avctx->priv_data;
3659 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
3661 dsputil_init(&s->dsp, avctx);
3664 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3665 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3666 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3667 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3668 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3669 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3688 #define mcfh(dx,dy)\
3689 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3690 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3691 mc_block_hpel ## dx ## dy ## 16;\
3692 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3693 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3694 mc_block_hpel ## dx ## dy ## 8;
3704 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3706 width= s->avctx->width;
3707 height= s->avctx->height;
3709 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3710 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3712 for(i=0; i<MAX_REF_FRAMES; i++)
3713 for(j=0; j<MAX_REF_FRAMES; j++)
3714 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3716 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3717 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3722 static int common_init_after_header(AVCodecContext *avctx){
3723 SnowContext *s = avctx->priv_data;
3724 int plane_index, level, orientation;
3726 for(plane_index=0; plane_index<3; plane_index++){
3727 int w= s->avctx->width;
3728 int h= s->avctx->height;
3731 w>>= s->chroma_h_shift;
3732 h>>= s->chroma_v_shift;
3734 s->plane[plane_index].width = w;
3735 s->plane[plane_index].height= h;
3737 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3738 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3739 SubBand *b= &s->plane[plane_index].band[level][orientation];
3741 b->buf= s->spatial_dwt_buffer;
3743 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3744 b->width = (w + !(orientation&1))>>1;
3745 b->height= (h + !(orientation>1))>>1;
3747 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3748 b->buf_x_offset = 0;
3749 b->buf_y_offset = 0;
3753 b->buf_x_offset = (w+1)>>1;
3756 b->buf += b->stride>>1;
3757 b->buf_y_offset = b->stride_line >> 1;
3759 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3762 b->parent= &s->plane[plane_index].band[level-1][orientation];
3763 //FIXME avoid this realloc
3764 av_freep(&b->x_coeff);
3765 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3775 static int qscale2qlog(int qscale){
3776 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3777 + 61*QROOT/8; //<64 >60
3780 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3782 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3783 * FIXME we know exact mv bits at this point,
3784 * but ratecontrol isn't set up to include them. */
3785 uint32_t coef_sum= 0;
3786 int level, orientation, delta_qlog;
3788 for(level=0; level<s->spatial_decomposition_count; level++){
3789 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3790 SubBand *b= &s->plane[0].band[level][orientation];
3791 IDWTELEM *buf= b->ibuf;
3792 const int w= b->width;
3793 const int h= b->height;
3794 const int stride= b->stride;
3795 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3796 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3797 const int qdiv= (1<<16)/qmul;
3799 //FIXME this is ugly
3802 buf[x+y*stride]= b->buf[x+y*stride];
3804 decorrelate(s, b, buf, stride, 1, 0);
3807 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3811 /* ugly, ratecontrol just takes a sqrt again */
3812 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3813 assert(coef_sum < INT_MAX);
3815 if(pict->pict_type == FF_I_TYPE){
3816 s->m.current_picture.mb_var_sum= coef_sum;
3817 s->m.current_picture.mc_mb_var_sum= 0;
3819 s->m.current_picture.mc_mb_var_sum= coef_sum;
3820 s->m.current_picture.mb_var_sum= 0;
3823 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3824 if (pict->quality < 0)
3826 s->lambda= pict->quality * 3/2;
3827 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3828 s->qlog+= delta_qlog;
3832 static void calculate_visual_weight(SnowContext *s, Plane *p){
3833 int width = p->width;
3834 int height= p->height;
3835 int level, orientation, x, y;
3837 for(level=0; level<s->spatial_decomposition_count; level++){
3838 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3839 SubBand *b= &p->band[level][orientation];
3840 IDWTELEM *ibuf= b->ibuf;
3843 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3844 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3845 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3846 for(y=0; y<height; y++){
3847 for(x=0; x<width; x++){
3848 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3853 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3863 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3864 SubBand *b= &p->band[level][orientation];
3868 int step= 1 << (s->spatial_decomposition_count - level);
3875 //FIXME bias for nonzero ?
3877 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3878 for(y=0; y<p->height; y++){
3879 for(x=0; x<p->width; x++){
3880 int sx= (x-xo + step/2) / step / Q2_STEP;
3881 int sy= (y-yo + step/2) / step / Q2_STEP;
3882 int v= r0[x + y*p->width] - r1[x + y*p->width];
3883 assert(sx>=0 && sy>=0 && sx < score_stride);
3885 score[sx + sy*score_stride] += v*v;
3886 assert(score[sx + sy*score_stride] >= 0);
3891 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3892 int level, orientation;
3894 for(level=0; level<s->spatial_decomposition_count; level++){
3895 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3896 SubBand *b= &p->band[level][orientation];
3897 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3899 dequantize(s, b, dst, b->stride);
3904 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3905 int level, orientation, ys, xs, x, y, pass;
3906 IDWTELEM best_dequant[height * stride];
3907 IDWTELEM idwt2_buffer[height * stride];
3908 const int score_stride= (width + 10)/Q2_STEP;
3909 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3910 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3911 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3913 //FIXME pass the copy cleanly ?
3915 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3916 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3918 for(level=0; level<s->spatial_decomposition_count; level++){
3919 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3920 SubBand *b= &p->band[level][orientation];
3921 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3922 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3923 assert(src == b->buf); // code does not depend on this but it is true currently
3925 quantize(s, b, dst, src, b->stride, s->qbias);
3928 for(pass=0; pass<1; pass++){
3929 if(s->qbias == 0) //keyframe
3931 for(level=0; level<s->spatial_decomposition_count; level++){
3932 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3933 SubBand *b= &p->band[level][orientation];
3934 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3935 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3937 for(ys= 0; ys<Q2_STEP; ys++){
3938 for(xs= 0; xs<Q2_STEP; xs++){
3939 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3940 dequantize_all(s, p, idwt2_buffer, width, height);
3941 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3942 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3943 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3944 for(y=ys; y<b->height; y+= Q2_STEP){
3945 for(x=xs; x<b->width; x+= Q2_STEP){
3946 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3947 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3948 //FIXME try more than just --
3951 dequantize_all(s, p, idwt2_buffer, width, height);
3952 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3953 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3954 for(y=ys; y<b->height; y+= Q2_STEP){
3955 for(x=xs; x<b->width; x+= Q2_STEP){
3956 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3957 if(score[score_idx] <= best_score[score_idx] + threshold){
3958 best_score[score_idx]= score[score_idx];
3959 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3960 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3961 //FIXME copy instead
3970 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3973 #endif /* QUANTIZE2==1 */
3975 static av_cold int encode_init(AVCodecContext *avctx)
3977 SnowContext *s = avctx->priv_data;
3980 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3981 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3982 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3986 if(avctx->prediction_method == DWT_97
3987 && (avctx->flags & CODEC_FLAG_QSCALE)
3988 && avctx->global_quality == 0){
3989 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3993 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3995 s->chroma_h_shift= 1; //FIXME XXX
3996 s->chroma_v_shift= 1;
3998 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3999 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
4001 for(plane_index=0; plane_index<3; plane_index++){
4002 s->plane[plane_index].diag_mc= 1;
4003 s->plane[plane_index].htaps= 6;
4004 s->plane[plane_index].hcoeff[0]= 40;
4005 s->plane[plane_index].hcoeff[1]= -10;
4006 s->plane[plane_index].hcoeff[2]= 2;
4007 s->plane[plane_index].fast_mc= 1;
4016 s->m.flags = avctx->flags;
4017 s->m.bit_rate= avctx->bit_rate;
4020 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4021 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4022 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4023 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4024 h263_encode_init(&s->m); //mv_penalty
4026 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4028 if(avctx->flags&CODEC_FLAG_PASS1){
4029 if(!avctx->stats_out)
4030 avctx->stats_out = av_mallocz(256);
4032 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4033 if(ff_rate_control_init(&s->m) < 0)
4036 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4038 avctx->coded_frame= &s->current_picture;
4039 switch(avctx->pix_fmt){
4040 // case PIX_FMT_YUV444P:
4041 // case PIX_FMT_YUV422P:
4042 case PIX_FMT_YUV420P:
4044 // case PIX_FMT_YUV411P:
4045 // case PIX_FMT_YUV410P:
4046 s->colorspace_type= 0;
4048 /* case PIX_FMT_RGB32:
4052 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4055 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4056 s->chroma_h_shift= 1;
4057 s->chroma_v_shift= 1;
4059 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4060 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4062 s->avctx->get_buffer(s->avctx, &s->input_picture);
4064 if(s->avctx->me_method == ME_ITER){
4066 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4067 for(i=0; i<s->max_ref_frames; i++){
4068 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4069 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4076 #define USE_HALFPEL_PLANE 0
4078 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4081 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4085 int w= s->avctx->width >>is_chroma;
4086 int h= s->avctx->height >>is_chroma;
4087 int ls= frame->linesize[p];
4088 uint8_t *src= frame->data[p];
4090 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4091 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4092 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4099 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4106 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4114 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4122 static void release_buffer(AVCodecContext *avctx){
4123 SnowContext *s = avctx->priv_data;
4126 if(s->last_picture[s->max_ref_frames-1].data[0]){
4127 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4129 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4130 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4134 static int frame_start(SnowContext *s){
4136 int w= s->avctx->width; //FIXME round up to x16 ?
4137 int h= s->avctx->height;
4139 if(s->current_picture.data[0]){
4140 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4141 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4142 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4145 release_buffer(s->avctx);
4147 tmp= s->last_picture[s->max_ref_frames-1];
4148 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4149 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4150 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4151 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4152 s->last_picture[0]= s->current_picture;
4153 s->current_picture= tmp;
4159 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4160 if(i && s->last_picture[i-1].key_frame)
4163 if(s->ref_frames==0){
4164 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
4169 s->current_picture.reference= 1;
4170 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4171 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4175 s->current_picture.key_frame= s->keyframe;
4180 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4181 SnowContext *s = avctx->priv_data;
4182 RangeCoder * const c= &s->c;
4183 AVFrame *pict = data;
4184 const int width= s->avctx->width;
4185 const int height= s->avctx->height;
4186 int level, orientation, plane_index, i, y;
4187 uint8_t rc_header_bak[sizeof(s->header_state)];
4188 uint8_t rc_block_bak[sizeof(s->block_state)];
4190 ff_init_range_encoder(c, buf, buf_size);
4191 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4195 for(y=0; y<(height>>shift); y++)
4196 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4197 &pict->data[i][y * pict->linesize[i]],
4200 s->new_picture = *pict;
4202 s->m.picture_number= avctx->frame_number;
4203 if(avctx->flags&CODEC_FLAG_PASS2){
4205 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4206 s->keyframe= pict->pict_type==FF_I_TYPE;
4207 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4208 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4209 if (pict->quality < 0)
4213 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4215 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4218 if(s->pass1_rc && avctx->frame_number == 0)
4219 pict->quality= 2*FF_QP2LAMBDA;
4221 s->qlog= qscale2qlog(pict->quality);
4222 s->lambda = pict->quality * 3/2;
4224 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4225 s->qlog= LOSSLESS_QLOG;
4227 }//else keep previous frame's qlog until after motion estimation
4231 s->m.current_picture_ptr= &s->m.current_picture;
4232 if(pict->pict_type == FF_P_TYPE){
4233 int block_width = (width +15)>>4;
4234 int block_height= (height+15)>>4;
4235 int stride= s->current_picture.linesize[0];
4237 assert(s->current_picture.data[0]);
4238 assert(s->last_picture[0].data[0]);
4240 s->m.avctx= s->avctx;
4241 s->m.current_picture.data[0]= s->current_picture.data[0];
4242 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4243 s->m. new_picture.data[0]= s-> input_picture.data[0];
4244 s->m. last_picture_ptr= &s->m. last_picture;
4246 s->m. last_picture.linesize[0]=
4247 s->m. new_picture.linesize[0]=
4248 s->m.current_picture.linesize[0]= stride;
4249 s->m.uvlinesize= s->current_picture.linesize[1];
4251 s->m.height= height;
4252 s->m.mb_width = block_width;
4253 s->m.mb_height= block_height;
4254 s->m.mb_stride= s->m.mb_width+1;
4255 s->m.b8_stride= 2*s->m.mb_width+1;
4257 s->m.pict_type= pict->pict_type;
4258 s->m.me_method= s->avctx->me_method;
4259 s->m.me.scene_change_score=0;
4260 s->m.flags= s->avctx->flags;
4261 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4262 s->m.out_format= FMT_H263;
4263 s->m.unrestricted_mv= 1;
4265 s->m.lambda = s->lambda;
4266 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4267 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4269 s->m.dsp= s->dsp; //move
4275 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4276 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4281 if(pict->pict_type == FF_I_TYPE)
4282 s->spatial_decomposition_count= 5;
4284 s->spatial_decomposition_count= 5;
4286 s->m.pict_type = pict->pict_type;
4287 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4289 common_init_after_header(avctx);
4291 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4292 for(plane_index=0; plane_index<3; plane_index++){
4293 calculate_visual_weight(s, &s->plane[plane_index]);
4298 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4299 encode_blocks(s, 1);
4300 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4302 for(plane_index=0; plane_index<3; plane_index++){
4303 Plane *p= &s->plane[plane_index];
4307 // int bits= put_bits_count(&s->c.pb);
4309 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4311 if(pict->data[plane_index]) //FIXME gray hack
4314 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4317 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4320 && pict->pict_type == FF_P_TYPE
4321 && !(avctx->flags&CODEC_FLAG_PASS2)
4322 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4323 ff_init_range_encoder(c, buf, buf_size);
4324 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4325 pict->pict_type= FF_I_TYPE;
4327 s->current_picture.key_frame=1;
4331 if(s->qlog == LOSSLESS_QLOG){
4334 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4340 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4346 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4348 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4350 if(s->pass1_rc && plane_index==0){
4351 int delta_qlog = ratecontrol_1pass(s, pict);
4352 if (delta_qlog <= INT_MIN)
4355 //reordering qlog in the bitstream would eliminate this reset
4356 ff_init_range_encoder(c, buf, buf_size);
4357 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4358 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4360 encode_blocks(s, 0);
4364 for(level=0; level<s->spatial_decomposition_count; level++){
4365 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4366 SubBand *b= &p->band[level][orientation];
4369 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4371 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4372 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4373 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4375 correlate(s, b, b->ibuf, b->stride, 1, 0);
4379 for(level=0; level<s->spatial_decomposition_count; level++){
4380 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4381 SubBand *b= &p->band[level][orientation];
4383 dequantize(s, b, b->ibuf, b->stride);
4387 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4388 if(s->qlog == LOSSLESS_QLOG){
4391 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4395 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4398 if(pict->pict_type == FF_I_TYPE){
4401 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4402 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4406 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4407 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4410 if(s->avctx->flags&CODEC_FLAG_PSNR){
4413 if(pict->data[plane_index]) //FIXME gray hack
4416 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4420 s->avctx->error[plane_index] += error;
4421 s->current_picture.error[plane_index] = error;
4426 update_last_header_values(s);
4428 release_buffer(avctx);
4430 s->current_picture.coded_picture_number = avctx->frame_number;
4431 s->current_picture.pict_type = pict->pict_type;
4432 s->current_picture.quality = pict->quality;
4433 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4434 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4435 s->m.current_picture.display_picture_number =
4436 s->m.current_picture.coded_picture_number = avctx->frame_number;
4437 s->m.current_picture.quality = pict->quality;
4438 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4440 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4442 if(avctx->flags&CODEC_FLAG_PASS1)
4443 ff_write_pass1_stats(&s->m);
4444 s->m.last_pict_type = s->m.pict_type;
4445 avctx->frame_bits = s->m.frame_bits;
4446 avctx->mv_bits = s->m.mv_bits;
4447 avctx->misc_bits = s->m.misc_bits;
4448 avctx->p_tex_bits = s->m.p_tex_bits;
4452 return ff_rac_terminate(c);
4455 static av_cold void common_end(SnowContext *s){
4456 int plane_index, level, orientation, i;
4458 av_freep(&s->spatial_dwt_buffer);
4459 av_freep(&s->spatial_idwt_buffer);
4462 av_freep(&s->m.me.scratchpad);
4463 av_freep(&s->m.me.map);
4464 av_freep(&s->m.me.score_map);
4465 av_freep(&s->m.obmc_scratchpad);
4467 av_freep(&s->block);
4468 av_freep(&s->scratchbuf);
4470 for(i=0; i<MAX_REF_FRAMES; i++){
4471 av_freep(&s->ref_mvs[i]);
4472 av_freep(&s->ref_scores[i]);
4473 if(s->last_picture[i].data[0])
4474 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4477 for(plane_index=0; plane_index<3; plane_index++){
4478 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4479 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4480 SubBand *b= &s->plane[plane_index].band[level][orientation];
4482 av_freep(&b->x_coeff);
4488 static av_cold int encode_end(AVCodecContext *avctx)
4490 SnowContext *s = avctx->priv_data;
4493 av_free(avctx->stats_out);
4498 static av_cold int decode_init(AVCodecContext *avctx)
4500 avctx->pix_fmt= PIX_FMT_YUV420P;
4507 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
4508 const uint8_t *buf = avpkt->data;
4509 int buf_size = avpkt->size;
4510 SnowContext *s = avctx->priv_data;
4511 RangeCoder * const c= &s->c;
4513 AVFrame *picture = data;
4514 int level, orientation, plane_index;
4516 ff_init_range_decoder(c, buf, buf_size);
4517 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4519 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4520 if(decode_header(s)<0)
4522 common_init_after_header(avctx);
4524 // realloc slice buffer for the case that spatial_decomposition_count changed
4525 slice_buffer_destroy(&s->sb);
4526 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4528 for(plane_index=0; plane_index<3; plane_index++){
4529 Plane *p= &s->plane[plane_index];
4530 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4531 && p->hcoeff[1]==-10
4537 if(frame_start(s) < 0)
4539 //keyframe flag duplication mess FIXME
4540 if(avctx->debug&FF_DEBUG_PICT_INFO)
4541 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4545 for(plane_index=0; plane_index<3; plane_index++){
4546 Plane *p= &s->plane[plane_index];
4550 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4552 if(s->avctx->debug&2048){
4553 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4554 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4558 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4559 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4565 for(level=0; level<s->spatial_decomposition_count; level++){
4566 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4567 SubBand *b= &p->band[level][orientation];
4568 unpack_coeffs(s, b, b->parent, orientation);
4574 const int mb_h= s->b_height << s->block_max_depth;
4575 const int block_size = MB_SIZE >> s->block_max_depth;
4576 const int block_w = plane_index ? block_size/2 : block_size;
4578 DWTCompose cs[MAX_DECOMPOSITIONS];
4583 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4584 for(mb_y=0; mb_y<=mb_h; mb_y++){
4586 int slice_starty = block_w*mb_y;
4587 int slice_h = block_w*(mb_y+1);
4588 if (!(s->keyframe || s->avctx->debug&512)){
4589 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4590 slice_h -= (block_w >> 1);
4593 for(level=0; level<s->spatial_decomposition_count; level++){
4594 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4595 SubBand *b= &p->band[level][orientation];
4598 int our_mb_start = mb_y;
4599 int our_mb_end = (mb_y + 1);
4601 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4602 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4603 if (!(s->keyframe || s->avctx->debug&512)){
4604 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4605 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4607 start_y = FFMIN(b->height, start_y);
4608 end_y = FFMIN(b->height, end_y);
4610 if (start_y != end_y){
4611 if (orientation == 0){
4612 SubBand * correlate_band = &p->band[0][0];
4613 int correlate_end_y = FFMIN(b->height, end_y + 1);
4614 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4615 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4616 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4617 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4620 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4625 for(; yd<slice_h; yd+=4){
4626 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4629 if(s->qlog == LOSSLESS_QLOG){
4630 for(; yq<slice_h && yq<h; yq++){
4631 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4633 line[x] <<= FRAC_BITS;
4638 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4640 y = FFMIN(p->height, slice_starty);
4641 end_y = FFMIN(p->height, slice_h);
4643 slice_buffer_release(&s->sb, y++);
4646 slice_buffer_flush(&s->sb);
4653 release_buffer(avctx);
4655 if(!(s->avctx->debug&2048))
4656 *picture= s->current_picture;
4658 *picture= s->mconly_picture;
4660 *data_size = sizeof(AVFrame);
4662 bytes_read= c->bytestream - c->bytestream_start;
4663 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4668 static av_cold int decode_end(AVCodecContext *avctx)
4670 SnowContext *s = avctx->priv_data;
4672 slice_buffer_destroy(&s->sb);
4679 AVCodec snow_decoder = {
4683 sizeof(SnowContext),
4688 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4690 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4693 #if CONFIG_SNOW_ENCODER
4694 AVCodec snow_encoder = {
4698 sizeof(SnowContext),
4702 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4712 #include "libavutil/lfg.h"
4717 int buffer[2][width*height];
4721 s.spatial_decomposition_count=6;
4722 s.spatial_decomposition_type=1;
4724 av_lfg_init(&prng, 1);
4726 printf("testing 5/3 DWT\n");
4727 for(i=0; i<width*height; i++)
4728 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4730 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4731 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4733 for(i=0; i<width*height; i++)
4734 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4736 printf("testing 9/7 DWT\n");
4737 s.spatial_decomposition_type=0;
4738 for(i=0; i<width*height; i++)
4739 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4741 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4742 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4744 for(i=0; i<width*height; i++)
4745 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4748 printf("testing AC coder\n");
4749 memset(s.header_state, 0, sizeof(s.header_state));
4750 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4751 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4753 for(i=-256; i<256; i++){
4754 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4756 ff_rac_terminate(&s.c);
4758 memset(s.header_state, 0, sizeof(s.header_state));
4759 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4760 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4762 for(i=-256; i<256; i++){
4764 j= get_symbol(&s.c, s.header_state, 1);
4765 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4769 int level, orientation, x, y;
4770 int64_t errors[8][4];
4773 memset(errors, 0, sizeof(errors));
4774 s.spatial_decomposition_count=3;
4775 s.spatial_decomposition_type=0;
4776 for(level=0; level<s.spatial_decomposition_count; level++){
4777 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4778 int w= width >> (s.spatial_decomposition_count-level);
4779 int h= height >> (s.spatial_decomposition_count-level);
4780 int stride= width << (s.spatial_decomposition_count-level);
4781 DWTELEM *buf= buffer[0];
4784 if(orientation&1) buf+=w;
4785 if(orientation>1) buf+=stride>>1;
4787 memset(buffer[0], 0, sizeof(int)*width*height);
4788 buf[w/2 + h/2*stride]= 256*256;
4789 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4790 for(y=0; y<height; y++){
4791 for(x=0; x<width; x++){
4792 int64_t d= buffer[0][x + y*width];
4794 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4796 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4798 error= (int)(sqrt(error)+0.5);
4799 errors[level][orientation]= error;
4800 if(g) g=av_gcd(g, error);
4804 printf("static int const visual_weight[][4]={\n");
4805 for(level=0; level<s.spatial_decomposition_count; level++){
4807 for(orientation=0; orientation<4; orientation++){
4808 printf("%8"PRId64",", errors[level][orientation]/g);
4815 int w= width >> (s.spatial_decomposition_count-level);
4816 //int h= height >> (s.spatial_decomposition_count-level);
4817 int stride= width << (s.spatial_decomposition_count-level);
4818 DWTELEM *buf= buffer[0];
4824 memset(buffer[0], 0, sizeof(int)*width*height);
4826 for(y=0; y<height; y++){
4827 for(x=0; x<width; x++){
4828 int tab[4]={0,2,3,1};
4829 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4832 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4836 buf[x + y*stride ]=169;
4837 buf[x + y*stride-w]=64;
4840 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4842 for(y=0; y<height; y++){
4843 for(x=0; x<width; x++){
4844 int64_t d= buffer[0][x + y*width];
4846 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4848 if(FFABS(height/2-y)<9) printf("\n");