2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
399 typedef struct x_and_coeff{
404 typedef struct SubBand{
409 int qlog; ///< log(qscale)/log[2^(1/6)]
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
420 typedef struct Plane{
423 SubBand band[MAX_DECOMPOSITIONS][4];
426 int8_t hcoeff[HTAPS_MAX/2];
431 int8_t last_hcoeff[HTAPS_MAX/2];
435 typedef struct SnowContext{
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438 AVCodecContext *avctx;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int last_spatial_decomposition_count;
458 int temporal_decomposition_count;
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462 uint32_t *ref_scores[MAX_REF_FRAMES];
463 DWTELEM *spatial_dwt_buffer;
464 IDWTELEM *spatial_idwt_buffer;
468 int spatial_scalability;
478 #define QBIAS_SHIFT 3
482 int last_block_max_depth;
483 Plane plane[MAX_PLANES];
485 #define ME_CACHE_SIZE 1024
486 int me_cache[ME_CACHE_SIZE];
487 int me_cache_generation;
490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
504 static void iterative_me(SnowContext *s);
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510 buf->base_buffer = base_buffer;
511 buf->line_count = line_count;
512 buf->line_width = line_width;
513 buf->data_count = max_allocated_lines;
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
517 for (i = 0; i < max_allocated_lines; i++)
519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
522 buf->data_stack_top = max_allocated_lines - 1;
525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
530 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
532 assert(buf->data_stack_top >= 0);
533 // assert(!buf->line[line]);
535 return buf->line[line];
537 offset = buf->line_width * line;
538 buffer = buf->data_stack[buf->data_stack_top];
539 buf->data_stack_top--;
540 buf->line[line] = buffer;
542 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
547 static void slice_buffer_release(slice_buffer * buf, int line)
552 assert(line >= 0 && line < buf->line_count);
553 assert(buf->line[line]);
555 offset = buf->line_width * line;
556 buffer = buf->line[line];
557 buf->data_stack_top++;
558 buf->data_stack[buf->data_stack_top] = buffer;
559 buf->line[line] = NULL;
561 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
564 static void slice_buffer_flush(slice_buffer * buf)
567 for (i = 0; i < buf->line_count; i++)
571 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
572 slice_buffer_release(buf, i);
577 static void slice_buffer_destroy(slice_buffer * buf)
580 slice_buffer_flush(buf);
582 for (i = buf->data_count - 1; i >= 0; i--)
584 av_freep(&buf->data_stack[i]);
586 av_freep(&buf->data_stack);
587 av_freep(&buf->line);
591 // Avoid a name clash on SGI IRIX
594 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
595 static uint8_t qexp[QROOT];
597 static inline int mirror(int v, int m){
598 while((unsigned)v > (unsigned)m){
605 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
609 const int a= FFABS(v);
610 const int e= av_log2(a);
612 const int el= FFMIN(e, 10);
613 put_rac(c, state+0, 0);
616 put_rac(c, state+1+i, 1); //1..10
619 put_rac(c, state+1+9, 1); //1..10
621 put_rac(c, state+1+FFMIN(i,9), 0);
623 for(i=e-1; i>=el; i--){
624 put_rac(c, state+22+9, (a>>i)&1); //22..31
627 put_rac(c, state+22+i, (a>>i)&1); //22..31
631 put_rac(c, state+11 + el, v < 0); //11..21
634 put_rac(c, state+0, 0);
637 put_rac(c, state+1+i, 1); //1..10
639 put_rac(c, state+1+i, 0);
641 for(i=e-1; i>=0; i--){
642 put_rac(c, state+22+i, (a>>i)&1); //22..31
646 put_rac(c, state+11 + e, v < 0); //11..21
649 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
651 put_rac(c, state+1+FFMIN(i,9), 0);
653 for(i=e-1; i>=0; i--){
654 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
658 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
662 put_rac(c, state+0, 1);
666 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
667 if(get_rac(c, state+0))
672 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
677 for(i=e-1; i>=0; i--){
678 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
681 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
688 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
690 int r= log2>=0 ? 1<<log2 : 1;
696 put_rac(c, state+4+log2, 1);
701 put_rac(c, state+4+log2, 0);
703 for(i=log2-1; i>=0; i--){
704 put_rac(c, state+31-i, (v>>i)&1);
708 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
710 int r= log2>=0 ? 1<<log2 : 1;
715 while(get_rac(c, state+4+log2)){
721 for(i=log2-1; i>=0; i--){
722 v+= get_rac(c, state+31-i)<<i;
728 static av_always_inline void
729 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
730 int dst_step, int src_step, int ref_step,
731 int width, int mul, int add, int shift,
732 int highpass, int inverse){
733 const int mirror_left= !highpass;
734 const int mirror_right= (width&1) ^ highpass;
735 const int w= (width>>1) - 1 + (highpass & width);
738 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
740 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
747 LIFT(src[i*src_step],
748 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
754 LIFT(src[w*src_step],
755 ((mul*2*ref[w*ref_step]+add)>>shift),
760 static av_always_inline void
761 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
762 int dst_step, int src_step, int ref_step,
763 int width, int mul, int add, int shift,
764 int highpass, int inverse){
765 const int mirror_left= !highpass;
766 const int mirror_right= (width&1) ^ highpass;
767 const int w= (width>>1) - 1 + (highpass & width);
770 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
772 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
779 LIFT(src[i*src_step],
780 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
786 LIFT(src[w*src_step],
787 ((mul*2*ref[w*ref_step]+add)>>shift),
793 static av_always_inline void
794 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
795 int dst_step, int src_step, int ref_step,
796 int width, int mul, int add, int shift,
797 int highpass, int inverse){
798 const int mirror_left= !highpass;
799 const int mirror_right= (width&1) ^ highpass;
800 const int w= (width>>1) - 1 + (highpass & width);
804 #define LIFTS(src, ref, inv) \
806 (src) + (((ref) + 4*(src))>>shift): \
807 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
809 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
816 LIFTS(src[i*src_step],
817 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
823 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
826 static av_always_inline void
827 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
828 int dst_step, int src_step, int ref_step,
829 int width, int mul, int add, int shift,
830 int highpass, int inverse){
831 const int mirror_left= !highpass;
832 const int mirror_right= (width&1) ^ highpass;
833 const int w= (width>>1) - 1 + (highpass & width);
837 #define LIFTS(src, ref, inv) \
839 (src) + (((ref) + 4*(src))>>shift): \
840 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
842 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
849 LIFTS(src[i*src_step],
850 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
856 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
861 static void horizontal_decompose53i(DWTELEM *b, int width){
863 const int width2= width>>1;
865 const int w2= (width+1)>>1;
867 for(x=0; x<width2; x++){
869 temp[x+w2]= b[2*x + 1];
883 for(x=1; x+1<width2; x+=2){
887 A2 += (A1 + A3 + 2)>>2;
891 A1= temp[x+1+width2];
894 A4 += (A1 + A3 + 2)>>2;
900 A2 += (A1 + A3 + 2)>>2;
905 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
906 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
910 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
913 for(i=0; i<width; i++){
914 b1[i] -= (b0[i] + b2[i])>>1;
918 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
921 for(i=0; i<width; i++){
922 b1[i] += (b0[i] + b2[i] + 2)>>2;
926 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
928 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
929 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
931 for(y=-2; y<height; y+=2){
932 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
933 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
936 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
937 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
938 STOP_TIMER("horizontal_decompose53i")}
941 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
942 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
943 STOP_TIMER("vertical_decompose53i*")}
950 static void horizontal_decompose97i(DWTELEM *b, int width){
952 const int w2= (width+1)>>1;
954 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
955 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
956 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
957 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
961 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
964 for(i=0; i<width; i++){
965 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
969 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
972 for(i=0; i<width; i++){
973 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
977 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
980 for(i=0; i<width; i++){
982 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
984 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
989 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
992 for(i=0; i<width; i++){
993 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
997 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
999 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1000 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1001 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1002 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1004 for(y=-4; y<height; y+=2){
1005 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1006 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1009 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1010 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1012 STOP_TIMER("horizontal_decompose97i")
1016 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1017 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1018 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1019 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1022 STOP_TIMER("vertical_decompose97i")
1032 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1035 for(level=0; level<decomposition_count; level++){
1037 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1038 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1043 static void horizontal_compose53i(IDWTELEM *b, int width){
1044 IDWTELEM temp[width];
1045 const int width2= width>>1;
1046 const int w2= (width+1)>>1;
1058 for(x=1; x+1<width2; x+=2){
1062 A2 += (A1 + A3 + 2)>>2;
1066 A1= temp[x+1+width2];
1069 A4 += (A1 + A3 + 2)>>2;
1075 A2 += (A1 + A3 + 2)>>2;
1079 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1080 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1082 for(x=0; x<width2; x++){
1084 b[2*x + 1]= temp[x+w2];
1090 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1093 for(i=0; i<width; i++){
1094 b1[i] += (b0[i] + b2[i])>>1;
1098 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1101 for(i=0; i<width; i++){
1102 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1106 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1107 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1108 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1112 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1113 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1114 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1118 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1121 IDWTELEM *b0= cs->b0;
1122 IDWTELEM *b1= cs->b1;
1123 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1124 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1127 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1128 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1129 STOP_TIMER("vertical_compose53i*")}
1132 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1133 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1134 STOP_TIMER("horizontal_compose53i")}
1141 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1143 IDWTELEM *b0= cs->b0;
1144 IDWTELEM *b1= cs->b1;
1145 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1146 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1149 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1150 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1151 STOP_TIMER("vertical_compose53i*")}
1154 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1155 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1156 STOP_TIMER("horizontal_compose53i")}
1163 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1165 spatial_compose53i_init(&cs, buffer, height, stride);
1166 while(cs.y <= height)
1167 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1171 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1172 IDWTELEM temp[width];
1173 const int w2= (width+1)>>1;
1175 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1176 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1177 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1178 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1181 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1184 for(i=0; i<width; i++){
1185 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1189 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1192 for(i=0; i<width; i++){
1193 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1197 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1200 for(i=0; i<width; i++){
1202 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1204 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1209 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1212 for(i=0; i<width; i++){
1213 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1217 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1220 for(i=0; i<width; i++){
1221 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1222 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1224 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1226 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1228 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1232 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1233 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1234 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1235 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1236 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1240 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1241 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1242 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1243 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1244 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1248 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1251 IDWTELEM *b0= cs->b0;
1252 IDWTELEM *b1= cs->b1;
1253 IDWTELEM *b2= cs->b2;
1254 IDWTELEM *b3= cs->b3;
1255 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1256 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1259 if(y>0 && y+4<height){
1260 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1262 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1263 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1264 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1265 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1268 STOP_TIMER("vertical_compose97i")}}
1271 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1272 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1273 if(width>400 && y+0<(unsigned)height){
1274 STOP_TIMER("horizontal_compose97i")}}
1283 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1285 IDWTELEM *b0= cs->b0;
1286 IDWTELEM *b1= cs->b1;
1287 IDWTELEM *b2= cs->b2;
1288 IDWTELEM *b3= cs->b3;
1289 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1290 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1293 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1294 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1295 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1296 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1298 STOP_TIMER("vertical_compose97i")}}
1301 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1302 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1303 if(width>400 && b0 <= b2){
1304 STOP_TIMER("horizontal_compose97i")}}
1313 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1315 spatial_compose97i_init(&cs, buffer, height, stride);
1316 while(cs.y <= height)
1317 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1320 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1322 for(level=decomposition_count-1; level>=0; level--){
1324 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1325 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1330 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1332 for(level=decomposition_count-1; level>=0; level--){
1334 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1335 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1340 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1341 const int support = type==1 ? 3 : 5;
1345 for(level=decomposition_count-1; level>=0; level--){
1346 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1348 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1350 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1357 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1358 const int support = type==1 ? 3 : 5;
1362 for(level=decomposition_count-1; level>=0; level--){
1363 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1365 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1367 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1374 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1375 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1377 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1378 for(y=0; y<height; y+=4)
1379 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1382 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1383 const int w= b->width;
1384 const int h= b->height;
1396 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1397 v= src[x + y*stride];
1400 t= src[x + (y-1)*stride];
1402 lt= src[x - 1 + (y-1)*stride];
1405 rt= src[x + 1 + (y-1)*stride];
1409 l= src[x - 1 + y*stride];
1411 if(orientation==1) ll= src[y + (x-2)*stride];
1412 else ll= src[x - 2 + y*stride];
1418 if(px<b->parent->width && py<b->parent->height)
1419 p= parent[px + py*2*stride];
1421 if(!(/*ll|*/l|lt|t|rt|p)){
1423 runs[run_index++]= run;
1431 max_index= run_index;
1432 runs[run_index++]= run;
1434 run= runs[run_index++];
1436 put_symbol2(&s->c, b->state[30], max_index, 0);
1437 if(run_index <= max_index)
1438 put_symbol2(&s->c, b->state[1], run, 3);
1441 if(s->c.bytestream_end - s->c.bytestream < w*40){
1442 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1447 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1448 v= src[x + y*stride];
1451 t= src[x + (y-1)*stride];
1453 lt= src[x - 1 + (y-1)*stride];
1456 rt= src[x + 1 + (y-1)*stride];
1460 l= src[x - 1 + y*stride];
1462 if(orientation==1) ll= src[y + (x-2)*stride];
1463 else ll= src[x - 2 + y*stride];
1469 if(px<b->parent->width && py<b->parent->height)
1470 p= parent[px + py*2*stride];
1472 if(/*ll|*/l|lt|t|rt|p){
1473 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1475 put_rac(&s->c, &b->state[0][context], !!v);
1478 run= runs[run_index++];
1480 if(run_index <= max_index)
1481 put_symbol2(&s->c, b->state[1], run, 3);
1489 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1490 int l2= 2*FFABS(l) + (l<0);
1491 int t2= 2*FFABS(t) + (t<0);
1493 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1494 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1502 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1503 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1504 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1505 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1506 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1509 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1510 const int w= b->width;
1511 const int h= b->height;
1516 x_and_coeff *xc= b->x_coeff;
1517 x_and_coeff *prev_xc= NULL;
1518 x_and_coeff *prev2_xc= xc;
1519 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1520 x_and_coeff *prev_parent_xc= parent_xc;
1522 runs= get_symbol2(&s->c, b->state[30], 0);
1523 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1528 int lt=0, t=0, rt=0;
1530 if(y && prev_xc->x == 0){
1542 if(prev_xc->x == x + 1)
1548 if(x>>1 > parent_xc->x){
1551 if(x>>1 == parent_xc->x){
1552 p= parent_xc->coeff;
1555 if(/*ll|*/l|lt|t|rt|p){
1556 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1558 v=get_rac(&s->c, &b->state[0][context]);
1560 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1561 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1568 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1570 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1571 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1580 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1581 else max_run= FFMIN(run, w-x-1);
1583 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1589 (xc++)->x= w+1; //end marker
1595 while(parent_xc->x != parent->width+1)
1598 prev_parent_xc= parent_xc;
1600 parent_xc= prev_parent_xc;
1605 (xc++)->x= w+1; //end marker
1609 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1610 const int w= b->width;
1612 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1613 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1614 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1619 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1624 /* If we are on the second or later slice, restore our index. */
1626 new_index = save_state[0];
1629 for(y=start_y; y<h; y++){
1632 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1633 memset(line, 0, b->width*sizeof(IDWTELEM));
1634 v = b->x_coeff[new_index].coeff;
1635 x = b->x_coeff[new_index++].x;
1638 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1639 register int u= -(v&1);
1640 line[x] = (t^u) - u;
1642 v = b->x_coeff[new_index].coeff;
1643 x = b->x_coeff[new_index++].x;
1646 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1647 STOP_TIMER("decode_subband")
1650 /* Save our variables for the next slice. */
1651 save_state[0] = new_index;
1656 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1657 int plane_index, level, orientation;
1659 for(plane_index=0; plane_index<3; plane_index++){
1660 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1661 for(orientation=level ? 1:0; orientation<4; orientation++){
1662 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1666 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1667 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1670 static int alloc_blocks(SnowContext *s){
1671 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1672 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1677 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1681 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1682 uint8_t *bytestream= d->bytestream;
1683 uint8_t *bytestream_start= d->bytestream_start;
1685 d->bytestream= bytestream;
1686 d->bytestream_start= bytestream_start;
1689 //near copy & paste from dsputil, FIXME
1690 static int pix_sum(uint8_t * pix, int line_size, int w)
1695 for (i = 0; i < w; i++) {
1696 for (j = 0; j < w; j++) {
1700 pix += line_size - w;
1705 //near copy & paste from dsputil, FIXME
1706 static int pix_norm1(uint8_t * pix, int line_size, int w)
1709 uint32_t *sq = ff_squareTbl + 256;
1712 for (i = 0; i < w; i++) {
1713 for (j = 0; j < w; j ++) {
1717 pix += line_size - w;
1722 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1723 const int w= s->b_width << s->block_max_depth;
1724 const int rem_depth= s->block_max_depth - level;
1725 const int index= (x + y*w) << rem_depth;
1726 const int block_w= 1<<rem_depth;
1739 for(j=0; j<block_w; j++){
1740 for(i=0; i<block_w; i++){
1741 s->block[index + i + j*w]= block;
1746 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1747 const int offset[3]= {
1749 ((y*c->uvstride + x)>>1),
1750 ((y*c->uvstride + x)>>1),
1754 c->src[0][i]= src [i];
1755 c->ref[0][i]= ref [i] + offset[i];
1760 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1761 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1762 if(s->ref_frames == 1){
1763 *mx = mid_pred(left->mx, top->mx, tr->mx);
1764 *my = mid_pred(left->my, top->my, tr->my);
1766 const int *scale = scale_mv_ref[ref];
1767 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1768 (top ->mx * scale[top ->ref] + 128) >>8,
1769 (tr ->mx * scale[tr ->ref] + 128) >>8);
1770 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1771 (top ->my * scale[top ->ref] + 128) >>8,
1772 (tr ->my * scale[tr ->ref] + 128) >>8);
1779 #define P_TOPRIGHT P[3]
1780 #define P_MEDIAN P[4]
1782 #define FLAG_QPEL 1 //must be 1
1784 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1785 uint8_t p_buffer[1024];
1786 uint8_t i_buffer[1024];
1787 uint8_t p_state[sizeof(s->block_state)];
1788 uint8_t i_state[sizeof(s->block_state)];
1790 uint8_t *pbbak= s->c.bytestream;
1791 uint8_t *pbbak_start= s->c.bytestream_start;
1792 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1793 const int w= s->b_width << s->block_max_depth;
1794 const int h= s->b_height << s->block_max_depth;
1795 const int rem_depth= s->block_max_depth - level;
1796 const int index= (x + y*w) << rem_depth;
1797 const int block_w= 1<<(LOG2_MB_SIZE - level);
1798 int trx= (x+1)<<rem_depth;
1799 int try= (y+1)<<rem_depth;
1800 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1801 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1802 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1803 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1804 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1805 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1806 int pl = left->color[0];
1807 int pcb= left->color[1];
1808 int pcr= left->color[2];
1812 const int stride= s->current_picture.linesize[0];
1813 const int uvstride= s->current_picture.linesize[1];
1814 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1815 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1816 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1818 int16_t last_mv[3][2];
1819 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1820 const int shift= 1+qpel;
1821 MotionEstContext *c= &s->m.me;
1822 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1823 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1824 int my_context= av_log2(2*FFABS(left->my - top->my));
1825 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1826 int ref, best_ref, ref_score, ref_mx, ref_my;
1828 assert(sizeof(s->block_state) >= 256);
1830 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1834 // clip predictors / edge ?
1836 P_LEFT[0]= left->mx;
1837 P_LEFT[1]= left->my;
1840 P_TOPRIGHT[0]= tr->mx;
1841 P_TOPRIGHT[1]= tr->my;
1843 last_mv[0][0]= s->block[index].mx;
1844 last_mv[0][1]= s->block[index].my;
1845 last_mv[1][0]= right->mx;
1846 last_mv[1][1]= right->my;
1847 last_mv[2][0]= bottom->mx;
1848 last_mv[2][1]= bottom->my;
1855 assert(c-> stride == stride);
1856 assert(c->uvstride == uvstride);
1858 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1859 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1860 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1861 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1863 c->xmin = - x*block_w - 16+2;
1864 c->ymin = - y*block_w - 16+2;
1865 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1866 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1868 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1869 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1870 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1871 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1872 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1873 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1874 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1876 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1877 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1880 c->pred_x= P_LEFT[0];
1881 c->pred_y= P_LEFT[1];
1883 c->pred_x = P_MEDIAN[0];
1884 c->pred_y = P_MEDIAN[1];
1889 for(ref=0; ref<s->ref_frames; ref++){
1890 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1892 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1893 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1895 assert(ref_mx >= c->xmin);
1896 assert(ref_mx <= c->xmax);
1897 assert(ref_my >= c->ymin);
1898 assert(ref_my <= c->ymax);
1900 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1901 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1902 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1903 if(s->ref_mvs[ref]){
1904 s->ref_mvs[ref][index][0]= ref_mx;
1905 s->ref_mvs[ref][index][1]= ref_my;
1906 s->ref_scores[ref][index]= ref_score;
1908 if(score > ref_score){
1915 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1918 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1920 pc.bytestream_start=
1921 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1922 memcpy(p_state, s->block_state, sizeof(s->block_state));
1924 if(level!=s->block_max_depth)
1925 put_rac(&pc, &p_state[4 + s_context], 1);
1926 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1927 if(s->ref_frames > 1)
1928 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1929 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1930 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1931 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1932 p_len= pc.bytestream - pc.bytestream_start;
1933 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1935 block_s= block_w*block_w;
1936 sum = pix_sum(current_data[0], stride, block_w);
1937 l= (sum + block_s/2)/block_s;
1938 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1940 block_s= block_w*block_w>>2;
1941 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1942 cb= (sum + block_s/2)/block_s;
1943 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1944 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1945 cr= (sum + block_s/2)/block_s;
1946 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1949 ic.bytestream_start=
1950 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1951 memcpy(i_state, s->block_state, sizeof(s->block_state));
1952 if(level!=s->block_max_depth)
1953 put_rac(&ic, &i_state[4 + s_context], 1);
1954 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1955 put_symbol(&ic, &i_state[32], l-pl , 1);
1956 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1957 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1958 i_len= ic.bytestream - ic.bytestream_start;
1959 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1961 // assert(score==256*256*256*64-1);
1962 assert(iscore < 255*255*256 + s->lambda2*10);
1963 assert(iscore >= 0);
1964 assert(l>=0 && l<=255);
1965 assert(pl>=0 && pl<=255);
1968 int varc= iscore >> 8;
1969 int vard= score >> 8;
1970 if (vard <= 64 || vard < varc)
1971 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1973 c->scene_change_score+= s->m.qscale;
1976 if(level!=s->block_max_depth){
1977 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1978 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1979 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1980 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1981 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1982 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1984 if(score2 < score && score2 < iscore)
1989 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1990 memcpy(pbbak, i_buffer, i_len);
1992 s->c.bytestream_start= pbbak_start;
1993 s->c.bytestream= pbbak + i_len;
1994 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1995 memcpy(s->block_state, i_state, sizeof(s->block_state));
1998 memcpy(pbbak, p_buffer, p_len);
2000 s->c.bytestream_start= pbbak_start;
2001 s->c.bytestream= pbbak + p_len;
2002 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2003 memcpy(s->block_state, p_state, sizeof(s->block_state));
2008 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2009 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2010 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2012 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2016 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2017 const int w= s->b_width << s->block_max_depth;
2018 const int rem_depth= s->block_max_depth - level;
2019 const int index= (x + y*w) << rem_depth;
2020 int trx= (x+1)<<rem_depth;
2021 BlockNode *b= &s->block[index];
2022 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2023 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2024 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2025 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2026 int pl = left->color[0];
2027 int pcb= left->color[1];
2028 int pcr= left->color[2];
2030 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2031 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2032 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2033 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2036 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2040 if(level!=s->block_max_depth){
2041 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2042 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2044 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2045 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2046 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2047 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2048 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2052 if(b->type & BLOCK_INTRA){
2053 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2054 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2055 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2056 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2057 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2058 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2060 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2061 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2062 if(s->ref_frames > 1)
2063 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2064 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2065 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2066 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2070 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2071 const int w= s->b_width << s->block_max_depth;
2072 const int rem_depth= s->block_max_depth - level;
2073 const int index= (x + y*w) << rem_depth;
2074 int trx= (x+1)<<rem_depth;
2075 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2076 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2077 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2078 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2079 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2082 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2086 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2088 int l = left->color[0];
2089 int cb= left->color[1];
2090 int cr= left->color[2];
2092 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2093 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2094 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2096 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2099 pred_mv(s, &mx, &my, 0, left, top, tr);
2100 l += get_symbol(&s->c, &s->block_state[32], 1);
2101 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2102 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2104 if(s->ref_frames > 1)
2105 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2106 pred_mv(s, &mx, &my, ref, left, top, tr);
2107 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2108 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2110 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2112 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2113 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2114 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2115 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2119 static void encode_blocks(SnowContext *s, int search){
2124 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2128 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2129 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2133 if(s->avctx->me_method == ME_ITER || !search)
2134 encode_q_branch2(s, 0, x, y);
2136 encode_q_branch (s, 0, x, y);
2141 static void decode_blocks(SnowContext *s){
2148 decode_q_branch(s, 0, x, y);
2153 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2154 const static uint8_t weight[64]={
2165 const static uint8_t brane[256]={
2166 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2167 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2168 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2169 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2170 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2171 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2172 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2173 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2174 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2175 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2176 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2177 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2178 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2179 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2180 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2181 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2184 const static uint8_t needs[16]={
2192 int16_t tmpIt [64*(32+HTAPS_MAX)];
2193 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2194 int16_t *tmpI= tmpIt;
2195 uint8_t *tmp2= tmp2t[0];
2198 assert(dx<16 && dy<16);
2199 r= brane[dx + 16*dy]&15;
2200 l= brane[dx + 16*dy]>>4;
2202 b= needs[l] | needs[r];
2203 if(p && !p->diag_mc)
2207 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2208 for(x=0; x < b_w; x++){
2209 int a_1=src[x + HTAPS_MAX/2-4];
2210 int a0= src[x + HTAPS_MAX/2-3];
2211 int a1= src[x + HTAPS_MAX/2-2];
2212 int a2= src[x + HTAPS_MAX/2-1];
2213 int a3= src[x + HTAPS_MAX/2+0];
2214 int a4= src[x + HTAPS_MAX/2+1];
2215 int a5= src[x + HTAPS_MAX/2+2];
2216 int a6= src[x + HTAPS_MAX/2+3];
2218 if(!p || p->fast_mc){
2219 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2223 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2228 if(am&(~255)) am= ~(am>>31);
2237 src += HTAPS_MAX/2 - 1;
2241 for(y=0; y < b_h; y++){
2242 for(x=0; x < b_w+1; x++){
2243 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2244 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2245 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2246 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2247 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2248 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2249 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2250 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2252 if(!p || p->fast_mc)
2253 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2255 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2257 if(am&(~255)) am= ~(am>>31);
2265 src += stride*(HTAPS_MAX/2 - 1);
2269 for(y=0; y < b_h; y++){
2270 for(x=0; x < b_w; x++){
2271 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2272 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2273 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2274 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2275 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2276 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2277 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2278 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2280 if(!p || p->fast_mc)
2281 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2283 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2284 if(am&(~255)) am= ~(am>>31);
2293 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2298 hpel[ 6]= tmp2t[1] + 1;
2300 hpel[ 8]= src + stride;
2301 hpel[ 9]= hpel[1] + stride;
2302 hpel[10]= hpel[8] + 1;
2305 uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2306 uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2307 uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2308 uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2311 for(y=0; y < b_h; y++){
2312 for(x=0; x < b_w; x++){
2313 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2314 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2323 uint8_t *src1= hpel[l];
2324 uint8_t *src2= hpel[r];
2325 int a= weight[((dx&7) + (8*(dy&7)))];
2327 for(y=0; y < b_h; y++){
2328 for(x=0; x < b_w; x++){
2329 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2336 STOP_TIMER("mc_block")
2339 #define mca(dx,dy,b_w)\
2340 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2341 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2343 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2355 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2356 if(block->type & BLOCK_INTRA){
2358 const int color = block->color[plane_index];
2359 const int color4= color*0x01010101;
2361 for(y=0; y < b_h; y++){
2362 *(uint32_t*)&dst[0 + y*stride]= color4;
2363 *(uint32_t*)&dst[4 + y*stride]= color4;
2364 *(uint32_t*)&dst[8 + y*stride]= color4;
2365 *(uint32_t*)&dst[12+ y*stride]= color4;
2366 *(uint32_t*)&dst[16+ y*stride]= color4;
2367 *(uint32_t*)&dst[20+ y*stride]= color4;
2368 *(uint32_t*)&dst[24+ y*stride]= color4;
2369 *(uint32_t*)&dst[28+ y*stride]= color4;
2372 for(y=0; y < b_h; y++){
2373 *(uint32_t*)&dst[0 + y*stride]= color4;
2374 *(uint32_t*)&dst[4 + y*stride]= color4;
2375 *(uint32_t*)&dst[8 + y*stride]= color4;
2376 *(uint32_t*)&dst[12+ y*stride]= color4;
2379 for(y=0; y < b_h; y++){
2380 *(uint32_t*)&dst[0 + y*stride]= color4;
2381 *(uint32_t*)&dst[4 + y*stride]= color4;
2384 for(y=0; y < b_h; y++){
2385 *(uint32_t*)&dst[0 + y*stride]= color4;
2388 for(y=0; y < b_h; y++){
2389 for(x=0; x < b_w; x++){
2390 dst[x + y*stride]= color;
2395 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2396 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2397 int mx= block->mx*scale;
2398 int my= block->my*scale;
2399 const int dx= mx&15;
2400 const int dy= my&15;
2401 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2402 sx += (mx>>4) - (HTAPS_MAX/2-1);
2403 sy += (my>>4) - (HTAPS_MAX/2-1);
2404 src += sx + sy*stride;
2405 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2406 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2407 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2410 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2411 // assert(!(b_w&(b_w-1)));
2412 assert(b_w>1 && b_h>1);
2413 assert(tab_index>=0 && tab_index<4 || b_w==32);
2414 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2415 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2418 for(y=0; y<b_h; y+=16){
2419 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2420 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2423 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2424 else if(b_w==2*b_h){
2425 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2426 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2429 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2430 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2435 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2436 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2439 for(y=0; y<b_h; y++){
2440 //FIXME ugly misuse of obmc_stride
2441 const uint8_t *obmc1= obmc + y*obmc_stride;
2442 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2443 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2444 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2445 dst = slice_buffer_get_line(sb, src_y + y);
2446 for(x=0; x<b_w; x++){
2447 int v= obmc1[x] * block[3][x + y*src_stride]
2448 +obmc2[x] * block[2][x + y*src_stride]
2449 +obmc3[x] * block[1][x + y*src_stride]
2450 +obmc4[x] * block[0][x + y*src_stride];
2452 v <<= 8 - LOG2_OBMC_MAX;
2454 v >>= 8 - FRAC_BITS;
2457 v += dst[x + src_x];
2458 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2459 if(v&(~255)) v= ~(v>>31);
2460 dst8[x + y*src_stride] = v;
2462 dst[x + src_x] -= v;
2468 //FIXME name clenup (b_w, block_w, b_width stuff)
2469 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2470 const int b_width = s->b_width << s->block_max_depth;
2471 const int b_height= s->b_height << s->block_max_depth;
2472 const int b_stride= b_width;
2473 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2474 BlockNode *rt= lt+1;
2475 BlockNode *lb= lt+b_stride;
2476 BlockNode *rb= lb+1;
2478 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2479 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2486 }else if(b_x + 1 >= b_width){
2493 }else if(b_y + 1 >= b_height){
2498 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2501 if(!sliced && !offset_dst)
2504 }else if(src_x + b_w > w){
2508 obmc -= src_y*obmc_stride;
2510 if(!sliced && !offset_dst)
2511 dst -= src_y*dst_stride;
2513 }else if(src_y + b_h> h){
2517 if(b_w<=0 || b_h<=0) return;
2519 assert(src_stride > 2*MB_SIZE + 5);
2520 if(!sliced && offset_dst)
2521 dst += src_x + src_y*dst_stride;
2522 dst8+= src_x + src_y*src_stride;
2523 // src += src_x + src_y*src_stride;
2525 ptmp= tmp + 3*tmp_step;
2528 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2530 if(same_block(lt, rt)){
2535 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2538 if(same_block(lt, lb)){
2540 }else if(same_block(rt, lb)){
2545 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2548 if(same_block(lt, rb) ){
2550 }else if(same_block(rt, rb)){
2552 }else if(same_block(lb, rb)){
2556 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2559 for(y=0; y<b_h; y++){
2560 for(x=0; x<b_w; x++){
2561 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2562 if(add) dst[x + y*dst_stride] += v;
2563 else dst[x + y*dst_stride] -= v;
2566 for(y=0; y<b_h; y++){
2567 uint8_t *obmc2= obmc + (obmc_stride>>1);
2568 for(x=0; x<b_w; x++){
2569 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2570 if(add) dst[x + y*dst_stride] += v;
2571 else dst[x + y*dst_stride] -= v;
2574 for(y=0; y<b_h; y++){
2575 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2576 for(x=0; x<b_w; x++){
2577 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2578 if(add) dst[x + y*dst_stride] += v;
2579 else dst[x + y*dst_stride] -= v;
2582 for(y=0; y<b_h; y++){
2583 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2584 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2585 for(x=0; x<b_w; x++){
2586 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2587 if(add) dst[x + y*dst_stride] += v;
2588 else dst[x + y*dst_stride] -= v;
2595 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2596 STOP_TIMER("inner_add_yblock")
2598 for(y=0; y<b_h; y++){
2599 //FIXME ugly misuse of obmc_stride
2600 const uint8_t *obmc1= obmc + y*obmc_stride;
2601 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2602 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2603 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2604 for(x=0; x<b_w; x++){
2605 int v= obmc1[x] * block[3][x + y*src_stride]
2606 +obmc2[x] * block[2][x + y*src_stride]
2607 +obmc3[x] * block[1][x + y*src_stride]
2608 +obmc4[x] * block[0][x + y*src_stride];
2610 v <<= 8 - LOG2_OBMC_MAX;
2612 v >>= 8 - FRAC_BITS;
2615 v += dst[x + y*dst_stride];
2616 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2617 if(v&(~255)) v= ~(v>>31);
2618 dst8[x + y*src_stride] = v;
2620 dst[x + y*dst_stride] -= v;
2627 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2628 Plane *p= &s->plane[plane_index];
2629 const int mb_w= s->b_width << s->block_max_depth;
2630 const int mb_h= s->b_height << s->block_max_depth;
2632 int block_size = MB_SIZE >> s->block_max_depth;
2633 int block_w = plane_index ? block_size/2 : block_size;
2634 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2635 int obmc_stride= plane_index ? block_size : 2*block_size;
2636 int ref_stride= s->current_picture.linesize[plane_index];
2637 uint8_t *dst8= s->current_picture.data[plane_index];
2642 if(s->keyframe || (s->avctx->debug&512)){
2647 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2649 // DWTELEM * line = slice_buffer_get_line(sb, y);
2650 IDWTELEM * line = sb->line[y];
2653 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2656 if(v&(~255)) v= ~(v>>31);
2657 dst8[x + y*ref_stride]= v;
2661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2663 // DWTELEM * line = slice_buffer_get_line(sb, y);
2664 IDWTELEM * line = sb->line[y];
2667 line[x] -= 128 << FRAC_BITS;
2668 // buf[x + y*w]-= 128<<FRAC_BITS;
2676 for(mb_x=0; mb_x<=mb_w; mb_x++){
2679 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2680 block_w*mb_x - block_w/2,
2681 block_w*mb_y - block_w/2,
2684 w, ref_stride, obmc_stride,
2686 add, 0, plane_index);
2688 STOP_TIMER("add_yblock")
2691 STOP_TIMER("predict_slice")
2694 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2695 Plane *p= &s->plane[plane_index];
2696 const int mb_w= s->b_width << s->block_max_depth;
2697 const int mb_h= s->b_height << s->block_max_depth;
2699 int block_size = MB_SIZE >> s->block_max_depth;
2700 int block_w = plane_index ? block_size/2 : block_size;
2701 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2702 const int obmc_stride= plane_index ? block_size : 2*block_size;
2703 int ref_stride= s->current_picture.linesize[plane_index];
2704 uint8_t *dst8= s->current_picture.data[plane_index];
2709 if(s->keyframe || (s->avctx->debug&512)){
2714 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2716 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2718 if(v&(~255)) v= ~(v>>31);
2719 dst8[x + y*ref_stride]= v;
2723 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2725 buf[x + y*w]-= 128<<FRAC_BITS;
2733 for(mb_x=0; mb_x<=mb_w; mb_x++){
2736 add_yblock(s, 0, NULL, buf, dst8, obmc,
2737 block_w*mb_x - block_w/2,
2738 block_w*mb_y - block_w/2,
2741 w, ref_stride, obmc_stride,
2743 add, 1, plane_index);
2745 STOP_TIMER("add_yblock")
2748 STOP_TIMER("predict_slice")
2751 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2752 const int mb_h= s->b_height << s->block_max_depth;
2754 for(mb_y=0; mb_y<=mb_h; mb_y++)
2755 predict_slice(s, buf, plane_index, add, mb_y);
2758 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2760 Plane *p= &s->plane[plane_index];
2761 const int block_size = MB_SIZE >> s->block_max_depth;
2762 const int block_w = plane_index ? block_size/2 : block_size;
2763 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2764 const int obmc_stride= plane_index ? block_size : 2*block_size;
2765 const int ref_stride= s->current_picture.linesize[plane_index];
2766 uint8_t *src= s-> input_picture.data[plane_index];
2767 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2768 const int b_stride = s->b_width << s->block_max_depth;
2769 const int w= p->width;
2770 const int h= p->height;
2771 int index= mb_x + mb_y*b_stride;
2772 BlockNode *b= &s->block[index];
2773 BlockNode backup= *b;
2777 b->type|= BLOCK_INTRA;
2778 b->color[plane_index]= 0;
2779 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2782 int mb_x2= mb_x + (i &1) - 1;
2783 int mb_y2= mb_y + (i>>1) - 1;
2784 int x= block_w*mb_x2 + block_w/2;
2785 int y= block_w*mb_y2 + block_w/2;
2787 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2788 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2790 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2791 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2792 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2793 int obmc_v= obmc[index];
2795 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2796 if(x<0) obmc_v += obmc[index + block_w];
2797 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2798 if(x+block_w>w) obmc_v += obmc[index - block_w];
2799 //FIXME precalc this or simplify it somehow else
2801 d = -dst[index] + (1<<(FRAC_BITS-1));
2803 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2804 aa += obmc_v * obmc_v; //FIXME precalclate this
2810 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2813 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2814 const int b_stride = s->b_width << s->block_max_depth;
2815 const int b_height = s->b_height<< s->block_max_depth;
2816 int index= x + y*b_stride;
2817 const BlockNode *b = &s->block[index];
2818 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2819 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2820 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2821 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2823 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2824 // int my_context= av_log2(2*FFABS(left->my - top->my));
2826 if(x<0 || x>=b_stride || y>=b_height)
2833 00001XXXX 15-30 8-15
2835 //FIXME try accurate rate
2836 //FIXME intra and inter predictors if surrounding blocks arent the same type
2837 if(b->type & BLOCK_INTRA){
2838 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2839 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2840 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2842 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2845 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2846 + av_log2(2*FFABS(dmy))
2847 + av_log2(2*b->ref));
2851 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2852 Plane *p= &s->plane[plane_index];
2853 const int block_size = MB_SIZE >> s->block_max_depth;
2854 const int block_w = plane_index ? block_size/2 : block_size;
2855 const int obmc_stride= plane_index ? block_size : 2*block_size;
2856 const int ref_stride= s->current_picture.linesize[plane_index];
2857 uint8_t *dst= s->current_picture.data[plane_index];
2858 uint8_t *src= s-> input_picture.data[plane_index];
2859 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2860 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2861 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2862 const int b_stride = s->b_width << s->block_max_depth;
2863 const int b_height = s->b_height<< s->block_max_depth;
2864 const int w= p->width;
2865 const int h= p->height;
2868 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2869 int sx= block_w*mb_x - block_w/2;
2870 int sy= block_w*mb_y - block_w/2;
2871 int x0= FFMAX(0,-sx);
2872 int y0= FFMAX(0,-sy);
2873 int x1= FFMIN(block_w*2, w-sx);
2874 int y1= FFMIN(block_w*2, h-sy);
2877 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2879 for(y=y0; y<y1; y++){
2880 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2881 const IDWTELEM *pred1 = pred + y*obmc_stride;
2882 uint8_t *cur1 = cur + y*ref_stride;
2883 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2884 for(x=x0; x<x1; x++){
2885 #if FRAC_BITS >= LOG2_OBMC_MAX
2886 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2888 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2890 v = (v + pred1[x]) >> FRAC_BITS;
2891 if(v&(~255)) v= ~(v>>31);
2896 /* copy the regions where obmc[] = (uint8_t)256 */
2897 if(LOG2_OBMC_MAX == 8
2898 && (mb_x == 0 || mb_x == b_stride-1)
2899 && (mb_y == 0 || mb_y == b_height-1)){
2908 for(y=y0; y<y1; y++)
2909 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2913 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2914 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2915 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2916 * so improving the score of one block is not strictly guaranteed to
2917 * improve the score of the whole frame, so iterative motion est
2918 * doesn't always converge. */
2919 if(s->avctx->me_cmp == FF_CMP_W97)
2920 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2921 else if(s->avctx->me_cmp == FF_CMP_W53)
2922 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2926 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2927 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2932 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2941 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2943 if(mb_x == b_stride-2)
2944 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2946 return distortion + rate*penalty_factor;
2949 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2951 Plane *p= &s->plane[plane_index];
2952 const int block_size = MB_SIZE >> s->block_max_depth;
2953 const int block_w = plane_index ? block_size/2 : block_size;
2954 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2955 const int obmc_stride= plane_index ? block_size : 2*block_size;
2956 const int ref_stride= s->current_picture.linesize[plane_index];
2957 uint8_t *dst= s->current_picture.data[plane_index];
2958 uint8_t *src= s-> input_picture.data[plane_index];
2959 static const IDWTELEM zero_dst[4096]; //FIXME
2960 const int b_stride = s->b_width << s->block_max_depth;
2961 const int w= p->width;
2962 const int h= p->height;
2965 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2968 int mb_x2= mb_x + (i%3) - 1;
2969 int mb_y2= mb_y + (i/3) - 1;
2970 int x= block_w*mb_x2 + block_w/2;
2971 int y= block_w*mb_y2 + block_w/2;
2973 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2974 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2976 //FIXME find a cleaner/simpler way to skip the outside stuff
2977 for(y2= y; y2<0; y2++)
2978 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2979 for(y2= h; y2<y+block_w; y2++)
2980 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2982 for(y2= y; y2<y+block_w; y2++)
2983 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2986 for(y2= y; y2<y+block_w; y2++)
2987 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2990 assert(block_w== 8 || block_w==16);
2991 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2995 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2996 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3004 rate = get_block_bits(s, mb_x, mb_y, 2);
3005 for(i=merged?4:0; i<9; i++){
3006 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3007 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3010 return distortion + rate*penalty_factor;
3013 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3014 const int b_stride= s->b_width << s->block_max_depth;
3015 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3016 BlockNode backup= *block;
3017 int rd, index, value;
3019 assert(mb_x>=0 && mb_y>=0);
3020 assert(mb_x<b_stride);
3023 block->color[0] = p[0];
3024 block->color[1] = p[1];
3025 block->color[2] = p[2];
3026 block->type |= BLOCK_INTRA;
3028 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3029 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3030 if(s->me_cache[index] == value)
3032 s->me_cache[index]= value;
3036 block->type &= ~BLOCK_INTRA;
3039 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3051 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3052 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3053 int p[2] = {p0, p1};
3054 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3057 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3058 const int b_stride= s->b_width << s->block_max_depth;
3059 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3060 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3061 int rd, index, value;
3063 assert(mb_x>=0 && mb_y>=0);
3064 assert(mb_x<b_stride);
3065 assert(((mb_x|mb_y)&1) == 0);
3067 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3068 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3069 if(s->me_cache[index] == value)
3071 s->me_cache[index]= value;
3076 block->type &= ~BLOCK_INTRA;
3077 block[1]= block[b_stride]= block[b_stride+1]= *block;
3079 rd= get_4block_rd(s, mb_x, mb_y, 0);
3086 block[0]= backup[0];
3087 block[1]= backup[1];
3088 block[b_stride]= backup[2];
3089 block[b_stride+1]= backup[3];
3094 static void iterative_me(SnowContext *s){
3095 int pass, mb_x, mb_y;
3096 const int b_width = s->b_width << s->block_max_depth;
3097 const int b_height= s->b_height << s->block_max_depth;
3098 const int b_stride= b_width;
3102 RangeCoder r = s->c;
3103 uint8_t state[sizeof(s->block_state)];
3104 memcpy(state, s->block_state, sizeof(s->block_state));
3105 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3106 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3107 encode_q_branch(s, 0, mb_x, mb_y);
3109 memcpy(s->block_state, state, sizeof(s->block_state));
3112 for(pass=0; pass<25; pass++){
3115 for(mb_y= 0; mb_y<b_height; mb_y++){
3116 for(mb_x= 0; mb_x<b_width; mb_x++){
3117 int dia_change, i, j, ref;
3118 int best_rd= INT_MAX, ref_rd;
3119 BlockNode backup, ref_b;
3120 const int index= mb_x + mb_y * b_stride;
3121 BlockNode *block= &s->block[index];
3122 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3123 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3124 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3125 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3126 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3127 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3128 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3129 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3130 const int b_w= (MB_SIZE >> s->block_max_depth);
3131 uint8_t obmc_edged[b_w*2][b_w*2];
3133 if(pass && (block->type & BLOCK_OPT))
3135 block->type |= BLOCK_OPT;
3139 if(!s->me_cache_generation)
3140 memset(s->me_cache, 0, sizeof(s->me_cache));
3141 s->me_cache_generation += 1<<22;
3146 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3148 for(y=0; y<b_w*2; y++)
3149 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3150 if(mb_x==b_stride-1)
3151 for(y=0; y<b_w*2; y++)
3152 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3154 for(x=0; x<b_w*2; x++)
3155 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3156 for(y=1; y<b_w; y++)
3157 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3159 if(mb_y==b_height-1){
3160 for(x=0; x<b_w*2; x++)
3161 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3162 for(y=b_w; y<b_w*2-1; y++)
3163 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3167 //skip stuff outside the picture
3168 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3170 uint8_t *src= s-> input_picture.data[0];
3171 uint8_t *dst= s->current_picture.data[0];
3172 const int stride= s->current_picture.linesize[0];
3173 const int block_w= MB_SIZE >> s->block_max_depth;
3174 const int sx= block_w*mb_x - block_w/2;
3175 const int sy= block_w*mb_y - block_w/2;
3176 const int w= s->plane[0].width;
3177 const int h= s->plane[0].height;
3181 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3182 for(y=h; y<sy+block_w*2; y++)
3183 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3185 for(y=sy; y<sy+block_w*2; y++)
3186 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3188 if(sx+block_w*2 > w){
3189 for(y=sy; y<sy+block_w*2; y++)
3190 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3194 // intra(black) = neighbors' contribution to the current block
3196 color[i]= get_dc(s, mb_x, mb_y, i);
3198 // get previous score (cannot be cached due to OBMC)
3199 if(pass > 0 && (block->type&BLOCK_INTRA)){
3200 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3201 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3203 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3207 for(ref=0; ref < s->ref_frames; ref++){
3208 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3209 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3214 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3215 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3217 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3219 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3221 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3223 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3226 //FIXME avoid subpel interpol / round to nearest integer
3229 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3231 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3232 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3233 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3234 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3240 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3243 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3245 //FIXME or try the standard 2 pass qpel or similar
3247 mvr[0][0]= block->mx;
3248 mvr[0][1]= block->my;
3249 if(ref_rd > best_rd){
3257 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3258 //FIXME RD style color selection
3260 if(!same_block(block, &backup)){
3261 if(tb ) tb ->type &= ~BLOCK_OPT;
3262 if(lb ) lb ->type &= ~BLOCK_OPT;
3263 if(rb ) rb ->type &= ~BLOCK_OPT;
3264 if(bb ) bb ->type &= ~BLOCK_OPT;
3265 if(tlb) tlb->type &= ~BLOCK_OPT;
3266 if(trb) trb->type &= ~BLOCK_OPT;
3267 if(blb) blb->type &= ~BLOCK_OPT;
3268 if(brb) brb->type &= ~BLOCK_OPT;
3273 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3278 if(s->block_max_depth == 1){
3280 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3281 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3283 int best_rd, init_rd;
3284 const int index= mb_x + mb_y * b_stride;
3287 b[0]= &s->block[index];
3289 b[2]= b[0]+b_stride;
3291 if(same_block(b[0], b[1]) &&
3292 same_block(b[0], b[2]) &&
3293 same_block(b[0], b[3]))
3296 if(!s->me_cache_generation)
3297 memset(s->me_cache, 0, sizeof(s->me_cache));
3298 s->me_cache_generation += 1<<22;
3300 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3302 //FIXME more multiref search?
3303 check_4block_inter(s, mb_x, mb_y,
3304 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3305 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3308 if(!(b[i]->type&BLOCK_INTRA))
3309 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3311 if(init_rd != best_rd)
3315 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3319 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3320 const int level= b->level;
3321 const int w= b->width;
3322 const int h= b->height;
3323 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3324 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3325 int x,y, thres1, thres2;
3328 if(s->qlog == LOSSLESS_QLOG){
3331 dst[x + y*stride]= src[x + y*stride];
3335 bias= bias ? 0 : (3*qmul)>>3;
3336 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3342 int i= src[x + y*stride];
3344 if((unsigned)(i+thres1) > thres2){
3347 i/= qmul; //FIXME optimize
3348 dst[x + y*stride]= i;
3352 i/= qmul; //FIXME optimize
3353 dst[x + y*stride]= -i;
3356 dst[x + y*stride]= 0;
3362 int i= src[x + y*stride];
3364 if((unsigned)(i+thres1) > thres2){
3367 i= (i + bias) / qmul; //FIXME optimize
3368 dst[x + y*stride]= i;
3372 i= (i + bias) / qmul; //FIXME optimize
3373 dst[x + y*stride]= -i;
3376 dst[x + y*stride]= 0;
3380 if(level+1 == s->spatial_decomposition_count){
3381 // STOP_TIMER("quantize")
3385 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3386 const int w= b->width;
3387 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3388 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3389 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3393 if(s->qlog == LOSSLESS_QLOG) return;
3395 for(y=start_y; y<end_y; y++){
3396 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3397 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3401 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3403 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3407 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3408 STOP_TIMER("dquant")
3412 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3413 const int w= b->width;
3414 const int h= b->height;
3415 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3416 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3417 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3421 if(s->qlog == LOSSLESS_QLOG) return;
3425 int i= src[x + y*stride];
3427 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3429 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3433 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3434 STOP_TIMER("dquant")
3438 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3439 const int w= b->width;
3440 const int h= b->height;
3443 for(y=h-1; y>=0; y--){
3444 for(x=w-1; x>=0; x--){
3445 int i= x + y*stride;
3449 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3450 else src[i] -= src[i - 1];
3452 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3453 else src[i] -= src[i - 1];
3456 if(y) src[i] -= src[i - stride];
3462 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3463 const int w= b->width;
3468 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3472 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3474 for(y=start_y; y<end_y; y++){
3476 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3477 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3481 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3482 else line[x] += line[x - 1];
3484 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3485 else line[x] += line[x - 1];
3488 if(y) line[x] += prev[x];
3493 // STOP_TIMER("correlate")
3496 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3497 const int w= b->width;
3498 const int h= b->height;
3503 int i= x + y*stride;
3507 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3508 else src[i] += src[i - 1];
3510 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3511 else src[i] += src[i - 1];
3514 if(y) src[i] += src[i - stride];
3520 static void encode_qlogs(SnowContext *s){
3521 int plane_index, level, orientation;
3523 for(plane_index=0; plane_index<2; plane_index++){
3524 for(level=0; level<s->spatial_decomposition_count; level++){
3525 for(orientation=level ? 1:0; orientation<4; orientation++){
3526 if(orientation==2) continue;
3527 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3533 static void encode_header(SnowContext *s){
3537 memset(kstate, MID_STATE, sizeof(kstate));
3539 put_rac(&s->c, kstate, s->keyframe);
3540 if(s->keyframe || s->always_reset){
3542 s->last_spatial_decomposition_type=
3546 s->last_block_max_depth= 0;
3547 for(plane_index=0; plane_index<2; plane_index++){
3548 Plane *p= &s->plane[plane_index];
3551 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3555 put_symbol(&s->c, s->header_state, s->version, 0);
3556 put_rac(&s->c, s->header_state, s->always_reset);
3557 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3558 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3559 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3560 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3561 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3562 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3563 put_rac(&s->c, s->header_state, s->spatial_scalability);
3564 // put_rac(&s->c, s->header_state, s->rate_scalability);
3565 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3572 for(plane_index=0; plane_index<2; plane_index++){
3573 Plane *p= &s->plane[plane_index];
3574 update_mc |= p->last_htaps != p->htaps;
3575 update_mc |= p->last_diag_mc != p->diag_mc;
3576 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3578 put_rac(&s->c, s->header_state, update_mc);
3580 for(plane_index=0; plane_index<2; plane_index++){
3581 Plane *p= &s->plane[plane_index];
3582 put_rac(&s->c, s->header_state, p->diag_mc);
3583 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3584 for(i= p->htaps/2; i; i--)
3585 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3588 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3589 put_rac(&s->c, s->header_state, 1);
3590 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3593 put_rac(&s->c, s->header_state, 0);
3596 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3597 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3598 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3599 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3600 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3604 static void update_last_header_values(SnowContext *s){
3608 for(plane_index=0; plane_index<2; plane_index++){
3609 Plane *p= &s->plane[plane_index];
3610 p->last_diag_mc= p->diag_mc;
3611 p->last_htaps = p->htaps;
3612 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3616 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3617 s->last_qlog = s->qlog;
3618 s->last_qbias = s->qbias;
3619 s->last_mv_scale = s->mv_scale;
3620 s->last_block_max_depth = s->block_max_depth;
3621 s->last_spatial_decomposition_count= s->spatial_decomposition_count;
3624 static void decode_qlogs(SnowContext *s){
3625 int plane_index, level, orientation;
3627 for(plane_index=0; plane_index<3; plane_index++){
3628 for(level=0; level<s->spatial_decomposition_count; level++){
3629 for(orientation=level ? 1:0; orientation<4; orientation++){
3631 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3632 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3633 else q= get_symbol(&s->c, s->header_state, 1);
3634 s->plane[plane_index].band[level][orientation].qlog= q;
3640 static int decode_header(SnowContext *s){
3644 memset(kstate, MID_STATE, sizeof(kstate));
3646 s->keyframe= get_rac(&s->c, kstate);
3647 if(s->keyframe || s->always_reset){
3649 s->spatial_decomposition_type=
3653 s->block_max_depth= 0;
3656 s->version= get_symbol(&s->c, s->header_state, 0);
3658 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3661 s->always_reset= get_rac(&s->c, s->header_state);
3662 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3663 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3664 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3665 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3666 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3667 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3668 s->spatial_scalability= get_rac(&s->c, s->header_state);
3669 // s->rate_scalability= get_rac(&s->c, s->header_state);
3670 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3676 if(get_rac(&s->c, s->header_state)){
3677 for(plane_index=0; plane_index<2; plane_index++){
3678 int htaps, i, sum=0;
3679 Plane *p= &s->plane[plane_index];
3680 p->diag_mc= get_rac(&s->c, s->header_state);
3681 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3682 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3685 for(i= htaps/2; i; i--){
3686 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3687 sum += p->hcoeff[i];
3689 p->hcoeff[0]= 32-sum;
3691 s->plane[2].diag_mc= s->plane[1].diag_mc;
3692 s->plane[2].htaps = s->plane[1].htaps;
3693 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3695 if(get_rac(&s->c, s->header_state)){
3696 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3701 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3702 if(s->spatial_decomposition_type > 1){
3703 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3707 s->qlog += get_symbol(&s->c, s->header_state, 1);
3708 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3709 s->qbias += get_symbol(&s->c, s->header_state, 1);
3710 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3711 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3712 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3713 s->block_max_depth= 0;
3720 static void init_qexp(void){
3724 for(i=0; i<QROOT; i++){
3726 v *= pow(2, 1.0 / QROOT);
3730 static int common_init(AVCodecContext *avctx){
3731 SnowContext *s = avctx->priv_data;
3737 dsputil_init(&s->dsp, avctx);
3740 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3741 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3742 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3743 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3744 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3745 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3764 #define mcfh(dx,dy)\
3765 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3766 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3767 mc_block_hpel ## dx ## dy ## 16;\
3768 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3769 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3770 mc_block_hpel ## dx ## dy ## 8;
3780 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3782 width= s->avctx->width;
3783 height= s->avctx->height;
3785 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3786 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3788 for(i=0; i<MAX_REF_FRAMES; i++)
3789 for(j=0; j<MAX_REF_FRAMES; j++)
3790 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3792 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3797 static int common_init_after_header(AVCodecContext *avctx){
3798 SnowContext *s = avctx->priv_data;
3799 int plane_index, level, orientation;
3801 for(plane_index=0; plane_index<3; plane_index++){
3802 int w= s->avctx->width;
3803 int h= s->avctx->height;
3806 w>>= s->chroma_h_shift;
3807 h>>= s->chroma_v_shift;
3809 s->plane[plane_index].width = w;
3810 s->plane[plane_index].height= h;
3812 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3813 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3814 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3815 SubBand *b= &s->plane[plane_index].band[level][orientation];
3817 b->buf= s->spatial_dwt_buffer;
3819 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3820 b->width = (w + !(orientation&1))>>1;
3821 b->height= (h + !(orientation>1))>>1;
3823 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3824 b->buf_x_offset = 0;
3825 b->buf_y_offset = 0;
3829 b->buf_x_offset = (w+1)>>1;
3832 b->buf += b->stride>>1;
3833 b->buf_y_offset = b->stride_line >> 1;
3835 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3838 b->parent= &s->plane[plane_index].band[level-1][orientation];
3839 //FIXME avoid this realloc
3840 av_freep(&b->x_coeff);
3841 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3851 static int qscale2qlog(int qscale){
3852 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3853 + 61*QROOT/8; //<64 >60
3856 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3858 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3859 * FIXME we know exact mv bits at this point,
3860 * but ratecontrol isn't set up to include them. */
3861 uint32_t coef_sum= 0;
3862 int level, orientation, delta_qlog;
3864 for(level=0; level<s->spatial_decomposition_count; level++){
3865 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3866 SubBand *b= &s->plane[0].band[level][orientation];
3867 IDWTELEM *buf= b->ibuf;
3868 const int w= b->width;
3869 const int h= b->height;
3870 const int stride= b->stride;
3871 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3872 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3873 const int qdiv= (1<<16)/qmul;
3875 //FIXME this is ugly
3878 buf[x+y*stride]= b->buf[x+y*stride];
3880 decorrelate(s, b, buf, stride, 1, 0);
3883 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3887 /* ugly, ratecontrol just takes a sqrt again */
3888 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3889 assert(coef_sum < INT_MAX);
3891 if(pict->pict_type == I_TYPE){
3892 s->m.current_picture.mb_var_sum= coef_sum;
3893 s->m.current_picture.mc_mb_var_sum= 0;
3895 s->m.current_picture.mc_mb_var_sum= coef_sum;
3896 s->m.current_picture.mb_var_sum= 0;
3899 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3900 if (pict->quality < 0)
3902 s->lambda= pict->quality * 3/2;
3903 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3904 s->qlog+= delta_qlog;
3908 static void calculate_visual_weight(SnowContext *s, Plane *p){
3909 int width = p->width;
3910 int height= p->height;
3911 int level, orientation, x, y;
3913 for(level=0; level<s->spatial_decomposition_count; level++){
3914 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3915 SubBand *b= &p->band[level][orientation];
3916 IDWTELEM *ibuf= b->ibuf;
3919 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3920 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3921 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3922 for(y=0; y<height; y++){
3923 for(x=0; x<width; x++){
3924 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3929 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3930 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3940 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3941 SubBand *b= &p->band[level][orientation];
3945 int step= 1 << (s->spatial_decomposition_count - level);
3952 //FIXME bias for non zero ?
3954 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3955 for(y=0; y<p->height; y++){
3956 for(x=0; x<p->width; x++){
3957 int sx= (x-xo + step/2) / step / Q2_STEP;
3958 int sy= (y-yo + step/2) / step / Q2_STEP;
3959 int v= r0[x + y*p->width] - r1[x + y*p->width];
3960 assert(sx>=0 && sy>=0 && sx < score_stride);
3962 score[sx + sy*score_stride] += v*v;
3963 assert(score[sx + sy*score_stride] >= 0);
3968 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3969 int level, orientation;
3971 for(level=0; level<s->spatial_decomposition_count; level++){
3972 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3973 SubBand *b= &p->band[level][orientation];
3974 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3976 dequantize(s, b, dst, b->stride);
3981 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3982 int level, orientation, ys, xs, x, y, pass;
3983 IDWTELEM best_dequant[height * stride];
3984 IDWTELEM idwt2_buffer[height * stride];
3985 const int score_stride= (width + 10)/Q2_STEP;
3986 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3987 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3988 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3990 //FIXME pass the copy cleanly ?
3992 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3993 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3995 for(level=0; level<s->spatial_decomposition_count; level++){
3996 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3997 SubBand *b= &p->band[level][orientation];
3998 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3999 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
4000 assert(src == b->buf); // code doesnt depen on this but its true currently
4002 quantize(s, b, dst, src, b->stride, s->qbias);
4005 for(pass=0; pass<1; pass++){
4006 if(s->qbias == 0) //keyframe
4008 for(level=0; level<s->spatial_decomposition_count; level++){
4009 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4010 SubBand *b= &p->band[level][orientation];
4011 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
4012 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
4014 for(ys= 0; ys<Q2_STEP; ys++){
4015 for(xs= 0; xs<Q2_STEP; xs++){
4016 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
4017 dequantize_all(s, p, idwt2_buffer, width, height);
4018 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
4019 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
4020 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
4021 for(y=ys; y<b->height; y+= Q2_STEP){
4022 for(x=xs; x<b->width; x+= Q2_STEP){
4023 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
4024 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
4025 //FIXME try more then just --
4028 dequantize_all(s, p, idwt2_buffer, width, height);
4029 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
4030 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
4031 for(y=ys; y<b->height; y+= Q2_STEP){
4032 for(x=xs; x<b->width; x+= Q2_STEP){
4033 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
4034 if(score[score_idx] <= best_score[score_idx] + threshold){
4035 best_score[score_idx]= score[score_idx];
4036 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
4037 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
4038 //FIXME copy instead
4047 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly insteda of copy at the end
4050 #endif /* QUANTIZE2==1 */
4052 static int encode_init(AVCodecContext *avctx)
4054 SnowContext *s = avctx->priv_data;
4057 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
4058 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
4059 "use vstrict=-2 / -strict -2 to use it anyway\n");
4063 if(avctx->prediction_method == DWT_97
4064 && (avctx->flags & CODEC_FLAG_QSCALE)
4065 && avctx->global_quality == 0){
4066 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
4070 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
4072 s->chroma_h_shift= 1; //FIXME XXX
4073 s->chroma_v_shift= 1;
4075 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
4076 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
4078 for(plane_index=0; plane_index<3; plane_index++){
4079 s->plane[plane_index].diag_mc= 1;
4080 s->plane[plane_index].htaps= 6;
4081 s->plane[plane_index].hcoeff[0]= 40;
4082 s->plane[plane_index].hcoeff[1]= -10;
4083 s->plane[plane_index].hcoeff[2]= 2;
4084 s->plane[plane_index].fast_mc= 1;
4093 s->m.flags = avctx->flags;
4094 s->m.bit_rate= avctx->bit_rate;
4096 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4097 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4098 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4099 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4100 h263_encode_init(&s->m); //mv_penalty
4102 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4104 if(avctx->flags&CODEC_FLAG_PASS1){
4105 if(!avctx->stats_out)
4106 avctx->stats_out = av_mallocz(256);
4108 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4109 if(ff_rate_control_init(&s->m) < 0)
4112 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4114 avctx->coded_frame= &s->current_picture;
4115 switch(avctx->pix_fmt){
4116 // case PIX_FMT_YUV444P:
4117 // case PIX_FMT_YUV422P:
4118 case PIX_FMT_YUV420P:
4120 // case PIX_FMT_YUV411P:
4121 // case PIX_FMT_YUV410P:
4122 s->colorspace_type= 0;
4124 /* case PIX_FMT_RGB32:
4128 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4131 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4132 s->chroma_h_shift= 1;
4133 s->chroma_v_shift= 1;
4135 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4136 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4138 s->avctx->get_buffer(s->avctx, &s->input_picture);
4140 if(s->avctx->me_method == ME_ITER){
4142 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4143 for(i=0; i<s->max_ref_frames; i++){
4144 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4145 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4152 #define USE_HALFPEL_PLANE 0
4154 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4157 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4161 int w= s->avctx->width >>is_chroma;
4162 int h= s->avctx->height >>is_chroma;
4163 int ls= frame->linesize[p];
4164 uint8_t *src= frame->data[p];
4166 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4167 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4168 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4175 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4182 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4190 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4198 static int frame_start(SnowContext *s){
4200 int w= s->avctx->width; //FIXME round up to x16 ?
4201 int h= s->avctx->height;
4203 if(s->current_picture.data[0]){
4204 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4205 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4206 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4209 tmp= s->last_picture[s->max_ref_frames-1];
4210 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4211 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4212 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4213 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4214 s->last_picture[0]= s->current_picture;
4215 s->current_picture= tmp;
4221 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4222 if(i && s->last_picture[i-1].key_frame)
4227 s->current_picture.reference= 1;
4228 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4229 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4233 s->current_picture.key_frame= s->keyframe;
4238 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4239 SnowContext *s = avctx->priv_data;
4240 RangeCoder * const c= &s->c;
4241 AVFrame *pict = data;
4242 const int width= s->avctx->width;
4243 const int height= s->avctx->height;
4244 int level, orientation, plane_index, i, y;
4245 uint8_t rc_header_bak[sizeof(s->header_state)];
4246 uint8_t rc_block_bak[sizeof(s->block_state)];
4248 ff_init_range_encoder(c, buf, buf_size);
4249 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4253 for(y=0; y<(height>>shift); y++)
4254 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4255 &pict->data[i][y * pict->linesize[i]],
4258 s->new_picture = *pict;
4260 s->m.picture_number= avctx->frame_number;
4261 if(avctx->flags&CODEC_FLAG_PASS2){
4263 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4264 s->keyframe= pict->pict_type==FF_I_TYPE;
4265 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4266 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4267 if (pict->quality < 0)
4271 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4273 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4276 if(s->pass1_rc && avctx->frame_number == 0)
4277 pict->quality= 2*FF_QP2LAMBDA;
4279 s->qlog= qscale2qlog(pict->quality);
4280 s->lambda = pict->quality * 3/2;
4282 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4283 s->qlog= LOSSLESS_QLOG;
4285 }//else keep previous frame's qlog until after motion est
4289 s->m.current_picture_ptr= &s->m.current_picture;
4290 if(pict->pict_type == P_TYPE){
4291 int block_width = (width +15)>>4;
4292 int block_height= (height+15)>>4;
4293 int stride= s->current_picture.linesize[0];
4295 assert(s->current_picture.data[0]);
4296 assert(s->last_picture[0].data[0]);
4298 s->m.avctx= s->avctx;
4299 s->m.current_picture.data[0]= s->current_picture.data[0];
4300 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4301 s->m. new_picture.data[0]= s-> input_picture.data[0];
4302 s->m. last_picture_ptr= &s->m. last_picture;
4304 s->m. last_picture.linesize[0]=
4305 s->m. new_picture.linesize[0]=
4306 s->m.current_picture.linesize[0]= stride;
4307 s->m.uvlinesize= s->current_picture.linesize[1];
4309 s->m.height= height;
4310 s->m.mb_width = block_width;
4311 s->m.mb_height= block_height;
4312 s->m.mb_stride= s->m.mb_width+1;
4313 s->m.b8_stride= 2*s->m.mb_width+1;
4315 s->m.pict_type= pict->pict_type;
4316 s->m.me_method= s->avctx->me_method;
4317 s->m.me.scene_change_score=0;
4318 s->m.flags= s->avctx->flags;
4319 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4320 s->m.out_format= FMT_H263;
4321 s->m.unrestricted_mv= 1;
4323 s->m.lambda = s->lambda;
4324 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4325 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4327 s->m.dsp= s->dsp; //move
4333 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4334 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4339 if(pict->pict_type == I_TYPE)
4340 s->spatial_decomposition_count= 5;
4342 s->spatial_decomposition_count= 5;
4344 s->m.pict_type = pict->pict_type;
4345 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4347 common_init_after_header(avctx);
4349 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4350 for(plane_index=0; plane_index<3; plane_index++){
4351 calculate_visual_weight(s, &s->plane[plane_index]);
4356 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4357 encode_blocks(s, 1);
4358 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4360 for(plane_index=0; plane_index<3; plane_index++){
4361 Plane *p= &s->plane[plane_index];
4365 // int bits= put_bits_count(&s->c.pb);
4367 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4369 if(pict->data[plane_index]) //FIXME gray hack
4372 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4375 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4378 && pict->pict_type == P_TYPE
4379 && !(avctx->flags&CODEC_FLAG_PASS2)
4380 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4381 ff_init_range_encoder(c, buf, buf_size);
4382 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4383 pict->pict_type= FF_I_TYPE;
4385 s->current_picture.key_frame=1;
4389 if(s->qlog == LOSSLESS_QLOG){
4392 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4398 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4404 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4406 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4408 if(s->pass1_rc && plane_index==0){
4409 int delta_qlog = ratecontrol_1pass(s, pict);
4410 if (delta_qlog <= INT_MIN)
4413 //reordering qlog in the bitstream would eliminate this reset
4414 ff_init_range_encoder(c, buf, buf_size);
4415 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4416 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4418 encode_blocks(s, 0);
4422 for(level=0; level<s->spatial_decomposition_count; level++){
4423 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4424 SubBand *b= &p->band[level][orientation];
4427 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4429 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
4430 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4431 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4433 correlate(s, b, b->ibuf, b->stride, 1, 0);
4436 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4438 for(level=0; level<s->spatial_decomposition_count; level++){
4439 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4440 SubBand *b= &p->band[level][orientation];
4442 dequantize(s, b, b->ibuf, b->stride);
4446 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4447 if(s->qlog == LOSSLESS_QLOG){
4450 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4455 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4456 STOP_TIMER("pred-conv")}
4459 if(pict->pict_type == I_TYPE){
4462 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4463 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4467 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4468 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4471 if(s->avctx->flags&CODEC_FLAG_PSNR){
4474 if(pict->data[plane_index]) //FIXME gray hack
4477 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4481 s->avctx->error[plane_index] += error;
4482 s->current_picture.error[plane_index] = error;
4486 update_last_header_values(s);
4488 if(s->last_picture[s->max_ref_frames-1].data[0]){
4489 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4491 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4492 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4495 s->current_picture.coded_picture_number = avctx->frame_number;
4496 s->current_picture.pict_type = pict->pict_type;
4497 s->current_picture.quality = pict->quality;
4498 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4499 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4500 s->m.current_picture.display_picture_number =
4501 s->m.current_picture.coded_picture_number = avctx->frame_number;
4502 s->m.current_picture.quality = pict->quality;
4503 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4505 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4507 if(avctx->flags&CODEC_FLAG_PASS1)
4508 ff_write_pass1_stats(&s->m);
4509 s->m.last_pict_type = s->m.pict_type;
4510 avctx->frame_bits = s->m.frame_bits;
4511 avctx->mv_bits = s->m.mv_bits;
4512 avctx->misc_bits = s->m.misc_bits;
4513 avctx->p_tex_bits = s->m.p_tex_bits;
4517 return ff_rac_terminate(c);
4520 static void common_end(SnowContext *s){
4521 int plane_index, level, orientation, i;
4523 av_freep(&s->spatial_dwt_buffer);
4524 av_freep(&s->spatial_idwt_buffer);
4526 av_freep(&s->m.me.scratchpad);
4527 av_freep(&s->m.me.map);
4528 av_freep(&s->m.me.score_map);
4529 av_freep(&s->m.obmc_scratchpad);
4531 av_freep(&s->block);
4533 for(i=0; i<MAX_REF_FRAMES; i++){
4534 av_freep(&s->ref_mvs[i]);
4535 av_freep(&s->ref_scores[i]);
4536 if(s->last_picture[i].data[0])
4537 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4540 for(plane_index=0; plane_index<3; plane_index++){
4541 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4542 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4543 SubBand *b= &s->plane[plane_index].band[level][orientation];
4545 av_freep(&b->x_coeff);
4551 static int encode_end(AVCodecContext *avctx)
4553 SnowContext *s = avctx->priv_data;
4556 av_free(avctx->stats_out);
4561 static int decode_init(AVCodecContext *avctx)
4563 avctx->pix_fmt= PIX_FMT_YUV420P;
4570 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4571 SnowContext *s = avctx->priv_data;
4572 RangeCoder * const c= &s->c;
4574 AVFrame *picture = data;
4575 int level, orientation, plane_index, i;
4577 ff_init_range_decoder(c, buf, buf_size);
4578 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4580 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4581 if(decode_header(s)<0)
4583 common_init_after_header(avctx);
4585 // realloc slice buffer for the case that spatial_decomposition_count changed
4586 slice_buffer_destroy(&s->sb);
4587 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4589 for(plane_index=0; plane_index<3; plane_index++){
4590 Plane *p= &s->plane[plane_index];
4591 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4592 && p->hcoeff[1]==-10
4596 if(!s->block) alloc_blocks(s);
4599 //keyframe flag dupliaction mess FIXME
4600 if(avctx->debug&FF_DEBUG_PICT_INFO)
4601 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4605 for(plane_index=0; plane_index<3; plane_index++){
4606 Plane *p= &s->plane[plane_index];
4610 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4612 if(s->avctx->debug&2048){
4613 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4614 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4618 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4619 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4625 for(level=0; level<s->spatial_decomposition_count; level++){
4626 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4627 SubBand *b= &p->band[level][orientation];
4628 unpack_coeffs(s, b, b->parent, orientation);
4631 STOP_TIMER("unpack coeffs");
4635 const int mb_h= s->b_height << s->block_max_depth;
4636 const int block_size = MB_SIZE >> s->block_max_depth;
4637 const int block_w = plane_index ? block_size/2 : block_size;
4639 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4644 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4645 for(mb_y=0; mb_y<=mb_h; mb_y++){
4647 int slice_starty = block_w*mb_y;
4648 int slice_h = block_w*(mb_y+1);
4649 if (!(s->keyframe || s->avctx->debug&512)){
4650 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4651 slice_h -= (block_w >> 1);
4656 for(level=0; level<s->spatial_decomposition_count; level++){
4657 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4658 SubBand *b= &p->band[level][orientation];
4661 int our_mb_start = mb_y;
4662 int our_mb_end = (mb_y + 1);
4664 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4665 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4666 if (!(s->keyframe || s->avctx->debug&512)){
4667 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4668 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4670 start_y = FFMIN(b->height, start_y);
4671 end_y = FFMIN(b->height, end_y);
4673 if (start_y != end_y){
4674 if (orientation == 0){
4675 SubBand * correlate_band = &p->band[0][0];
4676 int correlate_end_y = FFMIN(b->height, end_y + 1);
4677 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4678 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4679 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4680 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4683 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4687 STOP_TIMER("decode_subband_slice");
4691 for(; yd<slice_h; yd+=4){
4692 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4694 STOP_TIMER("idwt slice");}
4697 if(s->qlog == LOSSLESS_QLOG){
4698 for(; yq<slice_h && yq<h; yq++){
4699 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4701 line[x] <<= FRAC_BITS;
4706 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4708 y = FFMIN(p->height, slice_starty);
4709 end_y = FFMIN(p->height, slice_h);
4711 slice_buffer_release(&s->sb, y++);
4714 slice_buffer_flush(&s->sb);
4716 STOP_TIMER("idwt + predict_slices")}
4721 if(s->last_picture[s->max_ref_frames-1].data[0]){
4722 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4724 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4725 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4728 if(!(s->avctx->debug&2048))
4729 *picture= s->current_picture;
4731 *picture= s->mconly_picture;
4733 *data_size = sizeof(AVFrame);
4735 bytes_read= c->bytestream - c->bytestream_start;
4736 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4741 static int decode_end(AVCodecContext *avctx)
4743 SnowContext *s = avctx->priv_data;
4745 slice_buffer_destroy(&s->sb);
4752 AVCodec snow_decoder = {
4756 sizeof(SnowContext),
4761 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4765 #ifdef CONFIG_SNOW_ENCODER
4766 AVCodec snow_encoder = {
4770 sizeof(SnowContext),
4787 int buffer[2][width*height];
4790 s.spatial_decomposition_count=6;
4791 s.spatial_decomposition_type=1;
4793 printf("testing 5/3 DWT\n");
4794 for(i=0; i<width*height; i++)
4795 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4797 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4798 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4800 for(i=0; i<width*height; i++)
4801 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4803 printf("testing 9/7 DWT\n");
4804 s.spatial_decomposition_type=0;
4805 for(i=0; i<width*height; i++)
4806 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4808 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4809 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4811 for(i=0; i<width*height; i++)
4812 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4815 printf("testing AC coder\n");
4816 memset(s.header_state, 0, sizeof(s.header_state));
4817 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4818 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4820 for(i=-256; i<256; i++){
4822 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4823 STOP_TIMER("put_symbol")
4825 ff_rac_terminate(&s.c);
4827 memset(s.header_state, 0, sizeof(s.header_state));
4828 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4829 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4831 for(i=-256; i<256; i++){
4834 j= get_symbol(&s.c, s.header_state, 1);
4835 STOP_TIMER("get_symbol")
4836 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4840 int level, orientation, x, y;
4841 int64_t errors[8][4];
4844 memset(errors, 0, sizeof(errors));
4845 s.spatial_decomposition_count=3;
4846 s.spatial_decomposition_type=0;
4847 for(level=0; level<s.spatial_decomposition_count; level++){
4848 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4849 int w= width >> (s.spatial_decomposition_count-level);
4850 int h= height >> (s.spatial_decomposition_count-level);
4851 int stride= width << (s.spatial_decomposition_count-level);
4852 DWTELEM *buf= buffer[0];
4855 if(orientation&1) buf+=w;
4856 if(orientation>1) buf+=stride>>1;
4858 memset(buffer[0], 0, sizeof(int)*width*height);
4859 buf[w/2 + h/2*stride]= 256*256;
4860 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4861 for(y=0; y<height; y++){
4862 for(x=0; x<width; x++){
4863 int64_t d= buffer[0][x + y*width];
4865 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4867 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4869 error= (int)(sqrt(error)+0.5);
4870 errors[level][orientation]= error;
4871 if(g) g=ff_gcd(g, error);
4875 printf("static int const visual_weight[][4]={\n");
4876 for(level=0; level<s.spatial_decomposition_count; level++){
4878 for(orientation=0; orientation<4; orientation++){
4879 printf("%8"PRId64",", errors[level][orientation]/g);
4887 int w= width >> (s.spatial_decomposition_count-level);
4888 int h= height >> (s.spatial_decomposition_count-level);
4889 int stride= width << (s.spatial_decomposition_count-level);
4890 DWTELEM *buf= buffer[0];
4896 memset(buffer[0], 0, sizeof(int)*width*height);
4898 for(y=0; y<height; y++){
4899 for(x=0; x<width; x++){
4900 int tab[4]={0,2,3,1};
4901 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4904 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4908 buf[x + y*stride ]=169;
4909 buf[x + y*stride-w]=64;
4912 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4914 for(y=0; y<height; y++){
4915 for(x=0; x<width; x++){
4916 int64_t d= buffer[0][x + y*width];
4918 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4920 if(FFABS(height/2-y)<9) printf("\n");