2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
28 #include "mpegvideo.h"
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
359 static const uint8_t obmc4[16]={
367 static const uint8_t * const obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373 typedef struct BlockNode{
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 #define ENCODER_EXTRA_BITS 4
400 typedef struct x_and_coeff{
405 typedef struct SubBand{
410 int qlog; ///< log(qscale)/log[2^(1/6)]
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
421 typedef struct Plane{
424 SubBand band[MAX_DECOMPOSITIONS][4];
427 int8_t hcoeff[HTAPS_MAX/2];
432 int8_t last_hcoeff[HTAPS_MAX/2];
436 typedef struct SnowContext{
437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
439 AVCodecContext *avctx;
443 AVFrame input_picture; ///< new_picture with the internal linesizes
444 AVFrame current_picture;
445 AVFrame last_picture[MAX_REF_FRAMES];
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447 AVFrame mconly_picture;
448 // uint8_t q_context[16];
449 uint8_t header_state[32];
450 uint8_t block_state[128 + 32*128];
454 int spatial_decomposition_type;
455 int last_spatial_decomposition_type;
456 int temporal_decomposition_type;
457 int spatial_decomposition_count;
458 int last_spatial_decomposition_count;
459 int temporal_decomposition_count;
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463 uint32_t *ref_scores[MAX_REF_FRAMES];
464 DWTELEM *spatial_dwt_buffer;
465 IDWTELEM *spatial_idwt_buffer;
469 int spatial_scalability;
479 #define QBIAS_SHIFT 3
483 int last_block_max_depth;
484 Plane plane[MAX_PLANES];
486 #define ME_CACHE_SIZE 1024
487 int me_cache[ME_CACHE_SIZE];
488 int me_cache_generation;
491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
507 static void iterative_me(SnowContext *s);
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
513 buf->base_buffer = base_buffer;
514 buf->line_count = line_count;
515 buf->line_width = line_width;
516 buf->data_count = max_allocated_lines;
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
520 for(i = 0; i < max_allocated_lines; i++){
521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
524 buf->data_stack_top = max_allocated_lines - 1;
527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
532 assert(buf->data_stack_top >= 0);
533 // assert(!buf->line[line]);
535 return buf->line[line];
537 offset = buf->line_width * line;
538 buffer = buf->data_stack[buf->data_stack_top];
539 buf->data_stack_top--;
540 buf->line[line] = buffer;
545 static void slice_buffer_release(slice_buffer * buf, int line)
550 assert(line >= 0 && line < buf->line_count);
551 assert(buf->line[line]);
553 offset = buf->line_width * line;
554 buffer = buf->line[line];
555 buf->data_stack_top++;
556 buf->data_stack[buf->data_stack_top] = buffer;
557 buf->line[line] = NULL;
560 static void slice_buffer_flush(slice_buffer * buf)
563 for(i = 0; i < buf->line_count; i++){
565 slice_buffer_release(buf, i);
569 static void slice_buffer_destroy(slice_buffer * buf)
572 slice_buffer_flush(buf);
574 for(i = buf->data_count - 1; i >= 0; i--){
575 av_freep(&buf->data_stack[i]);
577 av_freep(&buf->data_stack);
578 av_freep(&buf->line);
582 // Avoid a name clash on SGI IRIX
585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
586 static uint8_t qexp[QROOT];
588 static inline int mirror(int v, int m){
589 while((unsigned)v > (unsigned)m){
596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
600 const int a= FFABS(v);
601 const int e= av_log2(a);
603 const int el= FFMIN(e, 10);
604 put_rac(c, state+0, 0);
607 put_rac(c, state+1+i, 1); //1..10
610 put_rac(c, state+1+9, 1); //1..10
612 put_rac(c, state+1+FFMIN(i,9), 0);
614 for(i=e-1; i>=el; i--){
615 put_rac(c, state+22+9, (a>>i)&1); //22..31
618 put_rac(c, state+22+i, (a>>i)&1); //22..31
622 put_rac(c, state+11 + el, v < 0); //11..21
625 put_rac(c, state+0, 0);
628 put_rac(c, state+1+i, 1); //1..10
630 put_rac(c, state+1+i, 0);
632 for(i=e-1; i>=0; i--){
633 put_rac(c, state+22+i, (a>>i)&1); //22..31
637 put_rac(c, state+11 + e, v < 0); //11..21
640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
642 put_rac(c, state+1+FFMIN(i,9), 0);
644 for(i=e-1; i>=0; i--){
645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
653 put_rac(c, state+0, 1);
657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
658 if(get_rac(c, state+0))
663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
668 for(i=e-1; i>=0; i--){
669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
681 int r= log2>=0 ? 1<<log2 : 1;
687 put_rac(c, state+4+log2, 1);
692 put_rac(c, state+4+log2, 0);
694 for(i=log2-1; i>=0; i--){
695 put_rac(c, state+31-i, (v>>i)&1);
699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
701 int r= log2>=0 ? 1<<log2 : 1;
706 while(get_rac(c, state+4+log2)){
712 for(i=log2-1; i>=0; i--){
713 v+= get_rac(c, state+31-i)<<i;
719 static av_always_inline void
720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
721 int dst_step, int src_step, int ref_step,
722 int width, int mul, int add, int shift,
723 int highpass, int inverse){
724 const int mirror_left= !highpass;
725 const int mirror_right= (width&1) ^ highpass;
726 const int w= (width>>1) - 1 + (highpass & width);
729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
738 LIFT(src[i*src_step],
739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
745 LIFT(src[w*src_step],
746 ((mul*2*ref[w*ref_step]+add)>>shift),
751 static av_always_inline void
752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
753 int dst_step, int src_step, int ref_step,
754 int width, int mul, int add, int shift,
755 int highpass, int inverse){
756 const int mirror_left= !highpass;
757 const int mirror_right= (width&1) ^ highpass;
758 const int w= (width>>1) - 1 + (highpass & width);
761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
770 LIFT(src[i*src_step],
771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
777 LIFT(src[w*src_step],
778 ((mul*2*ref[w*ref_step]+add)>>shift),
784 static av_always_inline void
785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
786 int dst_step, int src_step, int ref_step,
787 int width, int mul, int add, int shift,
788 int highpass, int inverse){
789 const int mirror_left= !highpass;
790 const int mirror_right= (width&1) ^ highpass;
791 const int w= (width>>1) - 1 + (highpass & width);
795 #define LIFTS(src, ref, inv) \
797 (src) + (((ref) + 4*(src))>>shift): \
798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
807 LIFTS(src[i*src_step],
808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
817 static av_always_inline void
818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
819 int dst_step, int src_step, int ref_step,
820 int width, int mul, int add, int shift,
821 int highpass, int inverse){
822 const int mirror_left= !highpass;
823 const int mirror_right= (width&1) ^ highpass;
824 const int w= (width>>1) - 1 + (highpass & width);
828 #define LIFTS(src, ref, inv) \
830 (src) + (((ref) + 4*(src))>>shift): \
831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
840 LIFTS(src[i*src_step],
841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
852 static void horizontal_decompose53i(DWTELEM *b, int width){
854 const int width2= width>>1;
856 const int w2= (width+1)>>1;
858 for(x=0; x<width2; x++){
860 temp[x+w2]= b[2*x + 1];
874 for(x=1; x+1<width2; x+=2){
878 A2 += (A1 + A3 + 2)>>2;
882 A1= temp[x+1+width2];
885 A4 += (A1 + A3 + 2)>>2;
891 A2 += (A1 + A3 + 2)>>2;
896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
904 for(i=0; i<width; i++){
905 b1[i] -= (b0[i] + b2[i])>>1;
909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
912 for(i=0; i<width; i++){
913 b1[i] += (b0[i] + b2[i] + 2)>>2;
917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
922 for(y=-2; y<height; y+=2){
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
937 static void horizontal_decompose97i(DWTELEM *b, int width){
939 const int w2= (width+1)>>1;
941 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
942 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
943 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
944 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
948 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
951 for(i=0; i<width; i++){
952 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
956 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
959 for(i=0; i<width; i++){
960 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
964 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
967 for(i=0; i<width; i++){
969 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
971 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
976 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
979 for(i=0; i<width; i++){
980 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
984 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
986 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
987 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
988 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
989 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
991 for(y=-4; y<height; y+=2){
992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1010 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1013 for(level=0; level<decomposition_count; level++){
1015 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1016 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1021 static void horizontal_compose53i(IDWTELEM *b, int width){
1022 IDWTELEM temp[width];
1023 const int width2= width>>1;
1024 const int w2= (width+1)>>1;
1036 for(x=1; x+1<width2; x+=2){
1040 A2 += (A1 + A3 + 2)>>2;
1044 A1= temp[x+1+width2];
1047 A4 += (A1 + A3 + 2)>>2;
1053 A2 += (A1 + A3 + 2)>>2;
1057 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1058 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1060 for(x=0; x<width2; x++){
1062 b[2*x + 1]= temp[x+w2];
1068 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1071 for(i=0; i<width; i++){
1072 b1[i] += (b0[i] + b2[i])>>1;
1076 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1079 for(i=0; i<width; i++){
1080 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1084 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1085 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1086 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1090 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1091 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1092 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1096 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1099 IDWTELEM *b0= cs->b0;
1100 IDWTELEM *b1= cs->b1;
1101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1115 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1117 IDWTELEM *b0= cs->b0;
1118 IDWTELEM *b1= cs->b1;
1119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1133 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1135 spatial_compose53i_init(&cs, buffer, height, stride);
1136 while(cs.y <= height)
1137 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1141 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1142 IDWTELEM temp[width];
1143 const int w2= (width+1)>>1;
1145 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1146 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1147 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1148 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1151 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1154 for(i=0; i<width; i++){
1155 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1159 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1162 for(i=0; i<width; i++){
1163 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1167 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1170 for(i=0; i<width; i++){
1172 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1174 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1179 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1182 for(i=0; i<width; i++){
1183 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1187 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1190 for(i=0; i<width; i++){
1191 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1192 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1194 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1196 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1198 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1202 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1203 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1204 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1205 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1206 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1210 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1211 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1212 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1213 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1214 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1218 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1221 IDWTELEM *b0= cs->b0;
1222 IDWTELEM *b1= cs->b1;
1223 IDWTELEM *b2= cs->b2;
1224 IDWTELEM *b3= cs->b3;
1225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1228 if(y>0 && y+4<height){
1229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1247 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1249 IDWTELEM *b0= cs->b0;
1250 IDWTELEM *b1= cs->b1;
1251 IDWTELEM *b2= cs->b2;
1252 IDWTELEM *b3= cs->b3;
1253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1271 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1273 spatial_compose97i_init(&cs, buffer, height, stride);
1274 while(cs.y <= height)
1275 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1278 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1280 for(level=decomposition_count-1; level>=0; level--){
1282 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1283 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1288 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1290 for(level=decomposition_count-1; level>=0; level--){
1292 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1293 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1298 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1299 const int support = type==1 ? 3 : 5;
1303 for(level=decomposition_count-1; level>=0; level--){
1304 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1306 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1308 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1315 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1316 const int support = type==1 ? 3 : 5;
1320 for(level=decomposition_count-1; level>=0; level--){
1321 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1323 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1325 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1332 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1333 DWTCompose cs[MAX_DECOMPOSITIONS];
1335 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1336 for(y=0; y<height; y+=4)
1337 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1340 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1341 const int w= b->width;
1342 const int h= b->height;
1354 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1355 v= src[x + y*stride];
1358 t= src[x + (y-1)*stride];
1360 lt= src[x - 1 + (y-1)*stride];
1363 rt= src[x + 1 + (y-1)*stride];
1367 l= src[x - 1 + y*stride];
1369 if(orientation==1) ll= src[y + (x-2)*stride];
1370 else ll= src[x - 2 + y*stride];
1376 if(px<b->parent->width && py<b->parent->height)
1377 p= parent[px + py*2*stride];
1379 if(!(/*ll|*/l|lt|t|rt|p)){
1381 runs[run_index++]= run;
1389 max_index= run_index;
1390 runs[run_index++]= run;
1392 run= runs[run_index++];
1394 put_symbol2(&s->c, b->state[30], max_index, 0);
1395 if(run_index <= max_index)
1396 put_symbol2(&s->c, b->state[1], run, 3);
1399 if(s->c.bytestream_end - s->c.bytestream < w*40){
1400 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1405 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1406 v= src[x + y*stride];
1409 t= src[x + (y-1)*stride];
1411 lt= src[x - 1 + (y-1)*stride];
1414 rt= src[x + 1 + (y-1)*stride];
1418 l= src[x - 1 + y*stride];
1420 if(orientation==1) ll= src[y + (x-2)*stride];
1421 else ll= src[x - 2 + y*stride];
1427 if(px<b->parent->width && py<b->parent->height)
1428 p= parent[px + py*2*stride];
1430 if(/*ll|*/l|lt|t|rt|p){
1431 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1433 put_rac(&s->c, &b->state[0][context], !!v);
1436 run= runs[run_index++];
1438 if(run_index <= max_index)
1439 put_symbol2(&s->c, b->state[1], run, 3);
1447 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1448 int l2= 2*FFABS(l) + (l<0);
1449 int t2= 2*FFABS(t) + (t<0);
1451 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1452 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1460 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1461 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1462 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1463 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1464 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1467 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1468 const int w= b->width;
1469 const int h= b->height;
1474 x_and_coeff *xc= b->x_coeff;
1475 x_and_coeff *prev_xc= NULL;
1476 x_and_coeff *prev2_xc= xc;
1477 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1478 x_and_coeff *prev_parent_xc= parent_xc;
1480 runs= get_symbol2(&s->c, b->state[30], 0);
1481 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1486 int lt=0, t=0, rt=0;
1488 if(y && prev_xc->x == 0){
1500 if(prev_xc->x == x + 1)
1506 if(x>>1 > parent_xc->x){
1509 if(x>>1 == parent_xc->x){
1510 p= parent_xc->coeff;
1513 if(/*ll|*/l|lt|t|rt|p){
1514 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1516 v=get_rac(&s->c, &b->state[0][context]);
1518 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1519 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1526 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1528 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1529 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1538 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1539 else max_run= FFMIN(run, w-x-1);
1541 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1547 (xc++)->x= w+1; //end marker
1553 while(parent_xc->x != parent->width+1)
1556 prev_parent_xc= parent_xc;
1558 parent_xc= prev_parent_xc;
1563 (xc++)->x= w+1; //end marker
1567 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1568 const int w= b->width;
1570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1580 /* If we are on the second or later slice, restore our index. */
1582 new_index = save_state[0];
1585 for(y=start_y; y<h; y++){
1588 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1589 memset(line, 0, b->width*sizeof(IDWTELEM));
1590 v = b->x_coeff[new_index].coeff;
1591 x = b->x_coeff[new_index++].x;
1593 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1594 register int u= -(v&1);
1595 line[x] = (t^u) - u;
1597 v = b->x_coeff[new_index].coeff;
1598 x = b->x_coeff[new_index++].x;
1602 /* Save our variables for the next slice. */
1603 save_state[0] = new_index;
1608 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1609 int plane_index, level, orientation;
1611 for(plane_index=0; plane_index<3; plane_index++){
1612 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1613 for(orientation=level ? 1:0; orientation<4; orientation++){
1614 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1618 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1619 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1622 static int alloc_blocks(SnowContext *s){
1623 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1624 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1630 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1634 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1635 uint8_t *bytestream= d->bytestream;
1636 uint8_t *bytestream_start= d->bytestream_start;
1638 d->bytestream= bytestream;
1639 d->bytestream_start= bytestream_start;
1642 //near copy & paste from dsputil, FIXME
1643 static int pix_sum(uint8_t * pix, int line_size, int w)
1648 for (i = 0; i < w; i++) {
1649 for (j = 0; j < w; j++) {
1653 pix += line_size - w;
1658 //near copy & paste from dsputil, FIXME
1659 static int pix_norm1(uint8_t * pix, int line_size, int w)
1662 uint32_t *sq = ff_squareTbl + 256;
1665 for (i = 0; i < w; i++) {
1666 for (j = 0; j < w; j ++) {
1670 pix += line_size - w;
1675 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1676 const int w= s->b_width << s->block_max_depth;
1677 const int rem_depth= s->block_max_depth - level;
1678 const int index= (x + y*w) << rem_depth;
1679 const int block_w= 1<<rem_depth;
1692 for(j=0; j<block_w; j++){
1693 for(i=0; i<block_w; i++){
1694 s->block[index + i + j*w]= block;
1699 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1700 const int offset[3]= {
1702 ((y*c->uvstride + x)>>1),
1703 ((y*c->uvstride + x)>>1),
1707 c->src[0][i]= src [i];
1708 c->ref[0][i]= ref [i] + offset[i];
1713 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1714 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1715 if(s->ref_frames == 1){
1716 *mx = mid_pred(left->mx, top->mx, tr->mx);
1717 *my = mid_pred(left->my, top->my, tr->my);
1719 const int *scale = scale_mv_ref[ref];
1720 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1721 (top ->mx * scale[top ->ref] + 128) >>8,
1722 (tr ->mx * scale[tr ->ref] + 128) >>8);
1723 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1724 (top ->my * scale[top ->ref] + 128) >>8,
1725 (tr ->my * scale[tr ->ref] + 128) >>8);
1732 #define P_TOPRIGHT P[3]
1733 #define P_MEDIAN P[4]
1735 #define FLAG_QPEL 1 //must be 1
1737 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1738 uint8_t p_buffer[1024];
1739 uint8_t i_buffer[1024];
1740 uint8_t p_state[sizeof(s->block_state)];
1741 uint8_t i_state[sizeof(s->block_state)];
1743 uint8_t *pbbak= s->c.bytestream;
1744 uint8_t *pbbak_start= s->c.bytestream_start;
1745 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1746 const int w= s->b_width << s->block_max_depth;
1747 const int h= s->b_height << s->block_max_depth;
1748 const int rem_depth= s->block_max_depth - level;
1749 const int index= (x + y*w) << rem_depth;
1750 const int block_w= 1<<(LOG2_MB_SIZE - level);
1751 int trx= (x+1)<<rem_depth;
1752 int try= (y+1)<<rem_depth;
1753 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1754 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1755 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1756 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1757 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1758 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1759 int pl = left->color[0];
1760 int pcb= left->color[1];
1761 int pcr= left->color[2];
1765 const int stride= s->current_picture.linesize[0];
1766 const int uvstride= s->current_picture.linesize[1];
1767 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1768 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1769 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1771 int16_t last_mv[3][2];
1772 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1773 const int shift= 1+qpel;
1774 MotionEstContext *c= &s->m.me;
1775 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1776 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1777 int my_context= av_log2(2*FFABS(left->my - top->my));
1778 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1779 int ref, best_ref, ref_score, ref_mx, ref_my;
1781 assert(sizeof(s->block_state) >= 256);
1783 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1787 // clip predictors / edge ?
1789 P_LEFT[0]= left->mx;
1790 P_LEFT[1]= left->my;
1793 P_TOPRIGHT[0]= tr->mx;
1794 P_TOPRIGHT[1]= tr->my;
1796 last_mv[0][0]= s->block[index].mx;
1797 last_mv[0][1]= s->block[index].my;
1798 last_mv[1][0]= right->mx;
1799 last_mv[1][1]= right->my;
1800 last_mv[2][0]= bottom->mx;
1801 last_mv[2][1]= bottom->my;
1808 assert(c-> stride == stride);
1809 assert(c->uvstride == uvstride);
1811 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1812 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1813 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1814 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1816 c->xmin = - x*block_w - 16+2;
1817 c->ymin = - y*block_w - 16+2;
1818 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1819 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1821 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1822 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1823 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1824 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1825 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1826 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1827 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1829 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1830 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1833 c->pred_x= P_LEFT[0];
1834 c->pred_y= P_LEFT[1];
1836 c->pred_x = P_MEDIAN[0];
1837 c->pred_y = P_MEDIAN[1];
1842 for(ref=0; ref<s->ref_frames; ref++){
1843 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1845 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1846 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1848 assert(ref_mx >= c->xmin);
1849 assert(ref_mx <= c->xmax);
1850 assert(ref_my >= c->ymin);
1851 assert(ref_my <= c->ymax);
1853 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1854 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1855 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1856 if(s->ref_mvs[ref]){
1857 s->ref_mvs[ref][index][0]= ref_mx;
1858 s->ref_mvs[ref][index][1]= ref_my;
1859 s->ref_scores[ref][index]= ref_score;
1861 if(score > ref_score){
1868 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1871 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1873 pc.bytestream_start=
1874 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1875 memcpy(p_state, s->block_state, sizeof(s->block_state));
1877 if(level!=s->block_max_depth)
1878 put_rac(&pc, &p_state[4 + s_context], 1);
1879 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1880 if(s->ref_frames > 1)
1881 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1882 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1883 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1884 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1885 p_len= pc.bytestream - pc.bytestream_start;
1886 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1888 block_s= block_w*block_w;
1889 sum = pix_sum(current_data[0], stride, block_w);
1890 l= (sum + block_s/2)/block_s;
1891 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1893 block_s= block_w*block_w>>2;
1894 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1895 cb= (sum + block_s/2)/block_s;
1896 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1897 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1898 cr= (sum + block_s/2)/block_s;
1899 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1902 ic.bytestream_start=
1903 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1904 memcpy(i_state, s->block_state, sizeof(s->block_state));
1905 if(level!=s->block_max_depth)
1906 put_rac(&ic, &i_state[4 + s_context], 1);
1907 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1908 put_symbol(&ic, &i_state[32], l-pl , 1);
1909 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1910 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1911 i_len= ic.bytestream - ic.bytestream_start;
1912 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1914 // assert(score==256*256*256*64-1);
1915 assert(iscore < 255*255*256 + s->lambda2*10);
1916 assert(iscore >= 0);
1917 assert(l>=0 && l<=255);
1918 assert(pl>=0 && pl<=255);
1921 int varc= iscore >> 8;
1922 int vard= score >> 8;
1923 if (vard <= 64 || vard < varc)
1924 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1926 c->scene_change_score+= s->m.qscale;
1929 if(level!=s->block_max_depth){
1930 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1931 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1933 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1934 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1935 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1937 if(score2 < score && score2 < iscore)
1942 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1943 memcpy(pbbak, i_buffer, i_len);
1945 s->c.bytestream_start= pbbak_start;
1946 s->c.bytestream= pbbak + i_len;
1947 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1948 memcpy(s->block_state, i_state, sizeof(s->block_state));
1951 memcpy(pbbak, p_buffer, p_len);
1953 s->c.bytestream_start= pbbak_start;
1954 s->c.bytestream= pbbak + p_len;
1955 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1956 memcpy(s->block_state, p_state, sizeof(s->block_state));
1961 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1962 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1963 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1965 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1969 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1970 const int w= s->b_width << s->block_max_depth;
1971 const int rem_depth= s->block_max_depth - level;
1972 const int index= (x + y*w) << rem_depth;
1973 int trx= (x+1)<<rem_depth;
1974 BlockNode *b= &s->block[index];
1975 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1976 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1977 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1978 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1979 int pl = left->color[0];
1980 int pcb= left->color[1];
1981 int pcr= left->color[2];
1983 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1984 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1985 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1986 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1989 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1993 if(level!=s->block_max_depth){
1994 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1995 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1997 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1998 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1999 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2000 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2001 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2005 if(b->type & BLOCK_INTRA){
2006 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2007 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2008 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2009 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2010 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2011 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2013 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2014 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2015 if(s->ref_frames > 1)
2016 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2017 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2018 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2019 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2023 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2024 const int w= s->b_width << s->block_max_depth;
2025 const int rem_depth= s->block_max_depth - level;
2026 const int index= (x + y*w) << rem_depth;
2027 int trx= (x+1)<<rem_depth;
2028 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2029 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2030 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2031 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2032 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2035 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2039 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2041 int l = left->color[0];
2042 int cb= left->color[1];
2043 int cr= left->color[2];
2045 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2046 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2047 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2049 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2052 pred_mv(s, &mx, &my, 0, left, top, tr);
2053 l += get_symbol(&s->c, &s->block_state[32], 1);
2054 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2055 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2057 if(s->ref_frames > 1)
2058 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2059 pred_mv(s, &mx, &my, ref, left, top, tr);
2060 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2061 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2063 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2065 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2066 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2067 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2068 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2072 static void encode_blocks(SnowContext *s, int search){
2077 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2081 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2082 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2086 if(s->avctx->me_method == ME_ITER || !search)
2087 encode_q_branch2(s, 0, x, y);
2089 encode_q_branch (s, 0, x, y);
2094 static void decode_blocks(SnowContext *s){
2101 decode_q_branch(s, 0, x, y);
2106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2107 static const uint8_t weight[64]={
2118 static const uint8_t brane[256]={
2119 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2120 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2121 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2122 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2123 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2124 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2125 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2126 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2127 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2128 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2129 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2130 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2131 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2132 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2133 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2134 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2137 static const uint8_t needs[16]={
2145 int16_t tmpIt [64*(32+HTAPS_MAX)];
2146 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2147 int16_t *tmpI= tmpIt;
2148 uint8_t *tmp2= tmp2t[0];
2149 const uint8_t *hpel[11];
2150 assert(dx<16 && dy<16);
2151 r= brane[dx + 16*dy]&15;
2152 l= brane[dx + 16*dy]>>4;
2154 b= needs[l] | needs[r];
2155 if(p && !p->diag_mc)
2159 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2160 for(x=0; x < b_w; x++){
2161 int a_1=src[x + HTAPS_MAX/2-4];
2162 int a0= src[x + HTAPS_MAX/2-3];
2163 int a1= src[x + HTAPS_MAX/2-2];
2164 int a2= src[x + HTAPS_MAX/2-1];
2165 int a3= src[x + HTAPS_MAX/2+0];
2166 int a4= src[x + HTAPS_MAX/2+1];
2167 int a5= src[x + HTAPS_MAX/2+2];
2168 int a6= src[x + HTAPS_MAX/2+3];
2170 if(!p || p->fast_mc){
2171 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2175 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2180 if(am&(~255)) am= ~(am>>31);
2189 src += HTAPS_MAX/2 - 1;
2193 for(y=0; y < b_h; y++){
2194 for(x=0; x < b_w+1; x++){
2195 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2196 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2197 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2198 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2199 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2200 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2201 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2202 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2204 if(!p || p->fast_mc)
2205 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2207 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2209 if(am&(~255)) am= ~(am>>31);
2217 src += stride*(HTAPS_MAX/2 - 1);
2221 for(y=0; y < b_h; y++){
2222 for(x=0; x < b_w; x++){
2223 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2224 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2225 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2226 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2227 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2228 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2229 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2230 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2232 if(!p || p->fast_mc)
2233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2236 if(am&(~255)) am= ~(am>>31);
2245 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2250 hpel[ 6]= tmp2t[1] + 1;
2252 hpel[ 8]= src + stride;
2253 hpel[ 9]= hpel[1] + stride;
2254 hpel[10]= hpel[8] + 1;
2257 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2258 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2259 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2260 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2263 for(y=0; y < b_h; y++){
2264 for(x=0; x < b_w; x++){
2265 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2266 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2275 const uint8_t *src1= hpel[l];
2276 const uint8_t *src2= hpel[r];
2277 int a= weight[((dx&7) + (8*(dy&7)))];
2279 for(y=0; y < b_h; y++){
2280 for(x=0; x < b_w; x++){
2281 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2290 #define mca(dx,dy,b_w)\
2291 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2292 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2294 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2306 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2307 if(block->type & BLOCK_INTRA){
2309 const int color = block->color[plane_index];
2310 const int color4= color*0x01010101;
2312 for(y=0; y < b_h; y++){
2313 *(uint32_t*)&dst[0 + y*stride]= color4;
2314 *(uint32_t*)&dst[4 + y*stride]= color4;
2315 *(uint32_t*)&dst[8 + y*stride]= color4;
2316 *(uint32_t*)&dst[12+ y*stride]= color4;
2317 *(uint32_t*)&dst[16+ y*stride]= color4;
2318 *(uint32_t*)&dst[20+ y*stride]= color4;
2319 *(uint32_t*)&dst[24+ y*stride]= color4;
2320 *(uint32_t*)&dst[28+ y*stride]= color4;
2323 for(y=0; y < b_h; y++){
2324 *(uint32_t*)&dst[0 + y*stride]= color4;
2325 *(uint32_t*)&dst[4 + y*stride]= color4;
2326 *(uint32_t*)&dst[8 + y*stride]= color4;
2327 *(uint32_t*)&dst[12+ y*stride]= color4;
2330 for(y=0; y < b_h; y++){
2331 *(uint32_t*)&dst[0 + y*stride]= color4;
2332 *(uint32_t*)&dst[4 + y*stride]= color4;
2335 for(y=0; y < b_h; y++){
2336 *(uint32_t*)&dst[0 + y*stride]= color4;
2339 for(y=0; y < b_h; y++){
2340 for(x=0; x < b_w; x++){
2341 dst[x + y*stride]= color;
2346 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2347 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2348 int mx= block->mx*scale;
2349 int my= block->my*scale;
2350 const int dx= mx&15;
2351 const int dy= my&15;
2352 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2353 sx += (mx>>4) - (HTAPS_MAX/2-1);
2354 sy += (my>>4) - (HTAPS_MAX/2-1);
2355 src += sx + sy*stride;
2356 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2357 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2358 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2361 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2362 // assert(!(b_w&(b_w-1)));
2363 assert(b_w>1 && b_h>1);
2364 assert((tab_index>=0 && tab_index<4) || b_w==32);
2365 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2366 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2369 for(y=0; y<b_h; y+=16){
2370 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2371 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2375 else if(b_w==2*b_h){
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2380 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2381 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2386 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2387 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2390 for(y=0; y<b_h; y++){
2391 //FIXME ugly misuse of obmc_stride
2392 const uint8_t *obmc1= obmc + y*obmc_stride;
2393 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2394 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2395 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2396 dst = slice_buffer_get_line(sb, src_y + y);
2397 for(x=0; x<b_w; x++){
2398 int v= obmc1[x] * block[3][x + y*src_stride]
2399 +obmc2[x] * block[2][x + y*src_stride]
2400 +obmc3[x] * block[1][x + y*src_stride]
2401 +obmc4[x] * block[0][x + y*src_stride];
2403 v <<= 8 - LOG2_OBMC_MAX;
2405 v >>= 8 - FRAC_BITS;
2408 v += dst[x + src_x];
2409 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2410 if(v&(~255)) v= ~(v>>31);
2411 dst8[x + y*src_stride] = v;
2413 dst[x + src_x] -= v;
2419 //FIXME name cleanup (b_w, block_w, b_width stuff)
2420 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2421 const int b_width = s->b_width << s->block_max_depth;
2422 const int b_height= s->b_height << s->block_max_depth;
2423 const int b_stride= b_width;
2424 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2425 BlockNode *rt= lt+1;
2426 BlockNode *lb= lt+b_stride;
2427 BlockNode *rb= lb+1;
2429 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2430 uint8_t *tmp = s->scratchbuf;
2437 }else if(b_x + 1 >= b_width){
2444 }else if(b_y + 1 >= b_height){
2449 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2452 if(!sliced && !offset_dst)
2455 }else if(src_x + b_w > w){
2459 obmc -= src_y*obmc_stride;
2461 if(!sliced && !offset_dst)
2462 dst -= src_y*dst_stride;
2464 }else if(src_y + b_h> h){
2468 if(b_w<=0 || b_h<=0) return;
2470 assert(src_stride > 2*MB_SIZE + 5);
2472 if(!sliced && offset_dst)
2473 dst += src_x + src_y*dst_stride;
2474 dst8+= src_x + src_y*src_stride;
2475 // src += src_x + src_y*src_stride;
2477 ptmp= tmp + 3*tmp_step;
2480 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2482 if(same_block(lt, rt)){
2487 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2490 if(same_block(lt, lb)){
2492 }else if(same_block(rt, lb)){
2497 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2500 if(same_block(lt, rb) ){
2502 }else if(same_block(rt, rb)){
2504 }else if(same_block(lb, rb)){
2508 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2511 for(y=0; y<b_h; y++){
2512 for(x=0; x<b_w; x++){
2513 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2514 if(add) dst[x + y*dst_stride] += v;
2515 else dst[x + y*dst_stride] -= v;
2518 for(y=0; y<b_h; y++){
2519 uint8_t *obmc2= obmc + (obmc_stride>>1);
2520 for(x=0; x<b_w; x++){
2521 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2522 if(add) dst[x + y*dst_stride] += v;
2523 else dst[x + y*dst_stride] -= v;
2526 for(y=0; y<b_h; y++){
2527 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2528 for(x=0; x<b_w; x++){
2529 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2530 if(add) dst[x + y*dst_stride] += v;
2531 else dst[x + y*dst_stride] -= v;
2534 for(y=0; y<b_h; y++){
2535 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2536 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2537 for(x=0; x<b_w; x++){
2538 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2539 if(add) dst[x + y*dst_stride] += v;
2540 else dst[x + y*dst_stride] -= v;
2545 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2547 for(y=0; y<b_h; y++){
2548 //FIXME ugly misuse of obmc_stride
2549 const uint8_t *obmc1= obmc + y*obmc_stride;
2550 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2551 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2552 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2553 for(x=0; x<b_w; x++){
2554 int v= obmc1[x] * block[3][x + y*src_stride]
2555 +obmc2[x] * block[2][x + y*src_stride]
2556 +obmc3[x] * block[1][x + y*src_stride]
2557 +obmc4[x] * block[0][x + y*src_stride];
2559 v <<= 8 - LOG2_OBMC_MAX;
2561 v >>= 8 - FRAC_BITS;
2564 v += dst[x + y*dst_stride];
2565 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2566 if(v&(~255)) v= ~(v>>31);
2567 dst8[x + y*src_stride] = v;
2569 dst[x + y*dst_stride] -= v;
2577 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2578 Plane *p= &s->plane[plane_index];
2579 const int mb_w= s->b_width << s->block_max_depth;
2580 const int mb_h= s->b_height << s->block_max_depth;
2582 int block_size = MB_SIZE >> s->block_max_depth;
2583 int block_w = plane_index ? block_size/2 : block_size;
2584 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2585 int obmc_stride= plane_index ? block_size : 2*block_size;
2586 int ref_stride= s->current_picture.linesize[plane_index];
2587 uint8_t *dst8= s->current_picture.data[plane_index];
2591 if(s->keyframe || (s->avctx->debug&512)){
2596 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2597 // DWTELEM * line = slice_buffer_get_line(sb, y);
2598 IDWTELEM * line = sb->line[y];
2600 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2601 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2603 if(v&(~255)) v= ~(v>>31);
2604 dst8[x + y*ref_stride]= v;
2608 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2609 // DWTELEM * line = slice_buffer_get_line(sb, y);
2610 IDWTELEM * line = sb->line[y];
2612 line[x] -= 128 << FRAC_BITS;
2613 // buf[x + y*w]-= 128<<FRAC_BITS;
2621 for(mb_x=0; mb_x<=mb_w; mb_x++){
2622 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2623 block_w*mb_x - block_w/2,
2624 block_w*mb_y - block_w/2,
2627 w, ref_stride, obmc_stride,
2629 add, 0, plane_index);
2633 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2634 Plane *p= &s->plane[plane_index];
2635 const int mb_w= s->b_width << s->block_max_depth;
2636 const int mb_h= s->b_height << s->block_max_depth;
2638 int block_size = MB_SIZE >> s->block_max_depth;
2639 int block_w = plane_index ? block_size/2 : block_size;
2640 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2641 const int obmc_stride= plane_index ? block_size : 2*block_size;
2642 int ref_stride= s->current_picture.linesize[plane_index];
2643 uint8_t *dst8= s->current_picture.data[plane_index];
2647 if(s->keyframe || (s->avctx->debug&512)){
2652 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2654 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2656 if(v&(~255)) v= ~(v>>31);
2657 dst8[x + y*ref_stride]= v;
2661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2663 buf[x + y*w]-= 128<<FRAC_BITS;
2671 for(mb_x=0; mb_x<=mb_w; mb_x++){
2672 add_yblock(s, 0, NULL, buf, dst8, obmc,
2673 block_w*mb_x - block_w/2,
2674 block_w*mb_y - block_w/2,
2677 w, ref_stride, obmc_stride,
2679 add, 1, plane_index);
2683 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2684 const int mb_h= s->b_height << s->block_max_depth;
2686 for(mb_y=0; mb_y<=mb_h; mb_y++)
2687 predict_slice(s, buf, plane_index, add, mb_y);
2690 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2692 Plane *p= &s->plane[plane_index];
2693 const int block_size = MB_SIZE >> s->block_max_depth;
2694 const int block_w = plane_index ? block_size/2 : block_size;
2695 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2696 const int obmc_stride= plane_index ? block_size : 2*block_size;
2697 const int ref_stride= s->current_picture.linesize[plane_index];
2698 uint8_t *src= s-> input_picture.data[plane_index];
2699 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2700 const int b_stride = s->b_width << s->block_max_depth;
2701 const int w= p->width;
2702 const int h= p->height;
2703 int index= mb_x + mb_y*b_stride;
2704 BlockNode *b= &s->block[index];
2705 BlockNode backup= *b;
2709 b->type|= BLOCK_INTRA;
2710 b->color[plane_index]= 0;
2711 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2714 int mb_x2= mb_x + (i &1) - 1;
2715 int mb_y2= mb_y + (i>>1) - 1;
2716 int x= block_w*mb_x2 + block_w/2;
2717 int y= block_w*mb_y2 + block_w/2;
2719 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2720 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2722 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2723 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2724 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2725 int obmc_v= obmc[index];
2727 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2728 if(x<0) obmc_v += obmc[index + block_w];
2729 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2730 if(x+block_w>w) obmc_v += obmc[index - block_w];
2731 //FIXME precalculate this or simplify it somehow else
2733 d = -dst[index] + (1<<(FRAC_BITS-1));
2735 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2736 aa += obmc_v * obmc_v; //FIXME precalculate this
2742 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2745 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2746 const int b_stride = s->b_width << s->block_max_depth;
2747 const int b_height = s->b_height<< s->block_max_depth;
2748 int index= x + y*b_stride;
2749 const BlockNode *b = &s->block[index];
2750 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2751 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2752 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2753 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2755 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2756 // int my_context= av_log2(2*FFABS(left->my - top->my));
2758 if(x<0 || x>=b_stride || y>=b_height)
2765 00001XXXX 15-30 8-15
2767 //FIXME try accurate rate
2768 //FIXME intra and inter predictors if surrounding blocks are not the same type
2769 if(b->type & BLOCK_INTRA){
2770 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2771 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2772 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2774 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2777 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2778 + av_log2(2*FFABS(dmy))
2779 + av_log2(2*b->ref));
2783 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2784 Plane *p= &s->plane[plane_index];
2785 const int block_size = MB_SIZE >> s->block_max_depth;
2786 const int block_w = plane_index ? block_size/2 : block_size;
2787 const int obmc_stride= plane_index ? block_size : 2*block_size;
2788 const int ref_stride= s->current_picture.linesize[plane_index];
2789 uint8_t *dst= s->current_picture.data[plane_index];
2790 uint8_t *src= s-> input_picture.data[plane_index];
2791 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2792 uint8_t *cur = s->scratchbuf;
2793 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2794 const int b_stride = s->b_width << s->block_max_depth;
2795 const int b_height = s->b_height<< s->block_max_depth;
2796 const int w= p->width;
2797 const int h= p->height;
2800 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2801 int sx= block_w*mb_x - block_w/2;
2802 int sy= block_w*mb_y - block_w/2;
2803 int x0= FFMAX(0,-sx);
2804 int y0= FFMAX(0,-sy);
2805 int x1= FFMIN(block_w*2, w-sx);
2806 int y1= FFMIN(block_w*2, h-sy);
2809 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2811 for(y=y0; y<y1; y++){
2812 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2813 const IDWTELEM *pred1 = pred + y*obmc_stride;
2814 uint8_t *cur1 = cur + y*ref_stride;
2815 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2816 for(x=x0; x<x1; x++){
2817 #if FRAC_BITS >= LOG2_OBMC_MAX
2818 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2820 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2822 v = (v + pred1[x]) >> FRAC_BITS;
2823 if(v&(~255)) v= ~(v>>31);
2828 /* copy the regions where obmc[] = (uint8_t)256 */
2829 if(LOG2_OBMC_MAX == 8
2830 && (mb_x == 0 || mb_x == b_stride-1)
2831 && (mb_y == 0 || mb_y == b_height-1)){
2840 for(y=y0; y<y1; y++)
2841 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2845 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2846 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2847 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2848 * So improving the score of one block is not strictly guaranteed
2849 * to improve the score of the whole frame, thus iterative motion
2850 * estimation does not always converge. */
2851 if(s->avctx->me_cmp == FF_CMP_W97)
2852 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2853 else if(s->avctx->me_cmp == FF_CMP_W53)
2854 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2858 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2859 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2864 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2873 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2875 if(mb_x == b_stride-2)
2876 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2878 return distortion + rate*penalty_factor;
2881 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2883 Plane *p= &s->plane[plane_index];
2884 const int block_size = MB_SIZE >> s->block_max_depth;
2885 const int block_w = plane_index ? block_size/2 : block_size;
2886 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2887 const int obmc_stride= plane_index ? block_size : 2*block_size;
2888 const int ref_stride= s->current_picture.linesize[plane_index];
2889 uint8_t *dst= s->current_picture.data[plane_index];
2890 uint8_t *src= s-> input_picture.data[plane_index];
2891 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2892 // const has only been removed from zero_dst to suppress a warning
2893 static IDWTELEM zero_dst[4096]; //FIXME
2894 const int b_stride = s->b_width << s->block_max_depth;
2895 const int w= p->width;
2896 const int h= p->height;
2899 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2902 int mb_x2= mb_x + (i%3) - 1;
2903 int mb_y2= mb_y + (i/3) - 1;
2904 int x= block_w*mb_x2 + block_w/2;
2905 int y= block_w*mb_y2 + block_w/2;
2907 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2908 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2910 //FIXME find a cleaner/simpler way to skip the outside stuff
2911 for(y2= y; y2<0; y2++)
2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2913 for(y2= h; y2<y+block_w; y2++)
2914 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2916 for(y2= y; y2<y+block_w; y2++)
2917 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2920 for(y2= y; y2<y+block_w; y2++)
2921 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2924 assert(block_w== 8 || block_w==16);
2925 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2929 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2930 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2938 rate = get_block_bits(s, mb_x, mb_y, 2);
2939 for(i=merged?4:0; i<9; i++){
2940 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2941 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2944 return distortion + rate*penalty_factor;
2947 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2948 const int b_stride= s->b_width << s->block_max_depth;
2949 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2950 BlockNode backup= *block;
2951 int rd, index, value;
2953 assert(mb_x>=0 && mb_y>=0);
2954 assert(mb_x<b_stride);
2957 block->color[0] = p[0];
2958 block->color[1] = p[1];
2959 block->color[2] = p[2];
2960 block->type |= BLOCK_INTRA;
2962 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2963 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2964 if(s->me_cache[index] == value)
2966 s->me_cache[index]= value;
2970 block->type &= ~BLOCK_INTRA;
2973 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2985 /* special case for int[2] args we discard afterwards,
2986 * fixes compilation problem with gcc 2.95 */
2987 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2988 int p[2] = {p0, p1};
2989 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2992 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2993 const int b_stride= s->b_width << s->block_max_depth;
2994 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2995 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2996 int rd, index, value;
2998 assert(mb_x>=0 && mb_y>=0);
2999 assert(mb_x<b_stride);
3000 assert(((mb_x|mb_y)&1) == 0);
3002 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3003 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3004 if(s->me_cache[index] == value)
3006 s->me_cache[index]= value;
3011 block->type &= ~BLOCK_INTRA;
3012 block[1]= block[b_stride]= block[b_stride+1]= *block;
3014 rd= get_4block_rd(s, mb_x, mb_y, 0);
3021 block[0]= backup[0];
3022 block[1]= backup[1];
3023 block[b_stride]= backup[2];
3024 block[b_stride+1]= backup[3];
3029 static void iterative_me(SnowContext *s){
3030 int pass, mb_x, mb_y;
3031 const int b_width = s->b_width << s->block_max_depth;
3032 const int b_height= s->b_height << s->block_max_depth;
3033 const int b_stride= b_width;
3037 RangeCoder r = s->c;
3038 uint8_t state[sizeof(s->block_state)];
3039 memcpy(state, s->block_state, sizeof(s->block_state));
3040 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3041 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3042 encode_q_branch(s, 0, mb_x, mb_y);
3044 memcpy(s->block_state, state, sizeof(s->block_state));
3047 for(pass=0; pass<25; pass++){
3050 for(mb_y= 0; mb_y<b_height; mb_y++){
3051 for(mb_x= 0; mb_x<b_width; mb_x++){
3052 int dia_change, i, j, ref;
3053 int best_rd= INT_MAX, ref_rd;
3054 BlockNode backup, ref_b;
3055 const int index= mb_x + mb_y * b_stride;
3056 BlockNode *block= &s->block[index];
3057 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3058 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3059 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3060 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3061 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3062 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3063 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3064 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3065 const int b_w= (MB_SIZE >> s->block_max_depth);
3066 uint8_t obmc_edged[b_w*2][b_w*2];
3068 if(pass && (block->type & BLOCK_OPT))
3070 block->type |= BLOCK_OPT;
3074 if(!s->me_cache_generation)
3075 memset(s->me_cache, 0, sizeof(s->me_cache));
3076 s->me_cache_generation += 1<<22;
3078 //FIXME precalculate
3081 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3083 for(y=0; y<b_w*2; y++)
3084 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3085 if(mb_x==b_stride-1)
3086 for(y=0; y<b_w*2; y++)
3087 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3089 for(x=0; x<b_w*2; x++)
3090 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3091 for(y=1; y<b_w; y++)
3092 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3094 if(mb_y==b_height-1){
3095 for(x=0; x<b_w*2; x++)
3096 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3097 for(y=b_w; y<b_w*2-1; y++)
3098 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3102 //skip stuff outside the picture
3103 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3104 uint8_t *src= s-> input_picture.data[0];
3105 uint8_t *dst= s->current_picture.data[0];
3106 const int stride= s->current_picture.linesize[0];
3107 const int block_w= MB_SIZE >> s->block_max_depth;
3108 const int sx= block_w*mb_x - block_w/2;
3109 const int sy= block_w*mb_y - block_w/2;
3110 const int w= s->plane[0].width;
3111 const int h= s->plane[0].height;
3115 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3116 for(y=h; y<sy+block_w*2; y++)
3117 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3119 for(y=sy; y<sy+block_w*2; y++)
3120 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3122 if(sx+block_w*2 > w){
3123 for(y=sy; y<sy+block_w*2; y++)
3124 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3128 // intra(black) = neighbors' contribution to the current block
3130 color[i]= get_dc(s, mb_x, mb_y, i);
3132 // get previous score (cannot be cached due to OBMC)
3133 if(pass > 0 && (block->type&BLOCK_INTRA)){
3134 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3135 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3137 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3141 for(ref=0; ref < s->ref_frames; ref++){
3142 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3143 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3148 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3149 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3151 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3153 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3155 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3157 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3160 //FIXME avoid subpel interpolation / round to nearest integer
3163 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3167 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3168 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3174 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3177 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3179 //FIXME or try the standard 2 pass qpel or similar
3181 mvr[0][0]= block->mx;
3182 mvr[0][1]= block->my;
3183 if(ref_rd > best_rd){
3191 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3192 //FIXME RD style color selection
3194 if(!same_block(block, &backup)){
3195 if(tb ) tb ->type &= ~BLOCK_OPT;
3196 if(lb ) lb ->type &= ~BLOCK_OPT;
3197 if(rb ) rb ->type &= ~BLOCK_OPT;
3198 if(bb ) bb ->type &= ~BLOCK_OPT;
3199 if(tlb) tlb->type &= ~BLOCK_OPT;
3200 if(trb) trb->type &= ~BLOCK_OPT;
3201 if(blb) blb->type &= ~BLOCK_OPT;
3202 if(brb) brb->type &= ~BLOCK_OPT;
3207 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3212 if(s->block_max_depth == 1){
3214 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3215 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3217 int best_rd, init_rd;
3218 const int index= mb_x + mb_y * b_stride;
3221 b[0]= &s->block[index];
3223 b[2]= b[0]+b_stride;
3225 if(same_block(b[0], b[1]) &&
3226 same_block(b[0], b[2]) &&
3227 same_block(b[0], b[3]))
3230 if(!s->me_cache_generation)
3231 memset(s->me_cache, 0, sizeof(s->me_cache));
3232 s->me_cache_generation += 1<<22;
3234 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3236 //FIXME more multiref search?
3237 check_4block_inter(s, mb_x, mb_y,
3238 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3239 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3242 if(!(b[i]->type&BLOCK_INTRA))
3243 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3245 if(init_rd != best_rd)
3249 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3253 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3254 const int w= b->width;
3255 const int h= b->height;
3256 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3257 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3258 int x,y, thres1, thres2;
3260 if(s->qlog == LOSSLESS_QLOG){
3263 dst[x + y*stride]= src[x + y*stride];
3267 bias= bias ? 0 : (3*qmul)>>3;
3268 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3274 int i= src[x + y*stride];
3276 if((unsigned)(i+thres1) > thres2){
3279 i/= qmul; //FIXME optimize
3280 dst[x + y*stride]= i;
3284 i/= qmul; //FIXME optimize
3285 dst[x + y*stride]= -i;
3288 dst[x + y*stride]= 0;
3294 int i= src[x + y*stride];
3296 if((unsigned)(i+thres1) > thres2){
3299 i= (i + bias) / qmul; //FIXME optimize
3300 dst[x + y*stride]= i;
3304 i= (i + bias) / qmul; //FIXME optimize
3305 dst[x + y*stride]= -i;
3308 dst[x + y*stride]= 0;
3314 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3315 const int w= b->width;
3316 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3317 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3318 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3321 if(s->qlog == LOSSLESS_QLOG) return;
3323 for(y=start_y; y<end_y; y++){
3324 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3325 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3329 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3331 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3337 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3338 const int w= b->width;
3339 const int h= b->height;
3340 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3341 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3342 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3345 if(s->qlog == LOSSLESS_QLOG) return;
3349 int i= src[x + y*stride];
3351 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3353 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3359 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3360 const int w= b->width;
3361 const int h= b->height;
3364 for(y=h-1; y>=0; y--){
3365 for(x=w-1; x>=0; x--){
3366 int i= x + y*stride;
3370 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3371 else src[i] -= src[i - 1];
3373 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3374 else src[i] -= src[i - 1];
3377 if(y) src[i] -= src[i - stride];
3383 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3384 const int w= b->width;
3387 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3391 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3393 for(y=start_y; y<end_y; y++){
3395 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3396 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3400 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3401 else line[x] += line[x - 1];
3403 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3404 else line[x] += line[x - 1];
3407 if(y) line[x] += prev[x];
3413 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3414 const int w= b->width;
3415 const int h= b->height;
3420 int i= x + y*stride;
3424 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3425 else src[i] += src[i - 1];
3427 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3428 else src[i] += src[i - 1];
3431 if(y) src[i] += src[i - stride];
3437 static void encode_qlogs(SnowContext *s){
3438 int plane_index, level, orientation;
3440 for(plane_index=0; plane_index<2; plane_index++){
3441 for(level=0; level<s->spatial_decomposition_count; level++){
3442 for(orientation=level ? 1:0; orientation<4; orientation++){
3443 if(orientation==2) continue;
3444 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3450 static void encode_header(SnowContext *s){
3454 memset(kstate, MID_STATE, sizeof(kstate));
3456 put_rac(&s->c, kstate, s->keyframe);
3457 if(s->keyframe || s->always_reset){
3459 s->last_spatial_decomposition_type=
3463 s->last_block_max_depth= 0;
3464 for(plane_index=0; plane_index<2; plane_index++){
3465 Plane *p= &s->plane[plane_index];
3468 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3472 put_symbol(&s->c, s->header_state, s->version, 0);
3473 put_rac(&s->c, s->header_state, s->always_reset);
3474 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3475 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3476 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3477 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3478 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3479 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3480 put_rac(&s->c, s->header_state, s->spatial_scalability);
3481 // put_rac(&s->c, s->header_state, s->rate_scalability);
3482 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3489 for(plane_index=0; plane_index<2; plane_index++){
3490 Plane *p= &s->plane[plane_index];
3491 update_mc |= p->last_htaps != p->htaps;
3492 update_mc |= p->last_diag_mc != p->diag_mc;
3493 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3495 put_rac(&s->c, s->header_state, update_mc);
3497 for(plane_index=0; plane_index<2; plane_index++){
3498 Plane *p= &s->plane[plane_index];
3499 put_rac(&s->c, s->header_state, p->diag_mc);
3500 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3501 for(i= p->htaps/2; i; i--)
3502 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3505 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3506 put_rac(&s->c, s->header_state, 1);
3507 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3510 put_rac(&s->c, s->header_state, 0);
3513 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3514 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3515 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3516 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3517 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3521 static void update_last_header_values(SnowContext *s){
3525 for(plane_index=0; plane_index<2; plane_index++){
3526 Plane *p= &s->plane[plane_index];
3527 p->last_diag_mc= p->diag_mc;
3528 p->last_htaps = p->htaps;
3529 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3533 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3534 s->last_qlog = s->qlog;
3535 s->last_qbias = s->qbias;
3536 s->last_mv_scale = s->mv_scale;
3537 s->last_block_max_depth = s->block_max_depth;
3538 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3541 static void decode_qlogs(SnowContext *s){
3542 int plane_index, level, orientation;
3544 for(plane_index=0; plane_index<3; plane_index++){
3545 for(level=0; level<s->spatial_decomposition_count; level++){
3546 for(orientation=level ? 1:0; orientation<4; orientation++){
3548 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3549 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3550 else q= get_symbol(&s->c, s->header_state, 1);
3551 s->plane[plane_index].band[level][orientation].qlog= q;
3557 #define GET_S(dst, check) \
3558 tmp= get_symbol(&s->c, s->header_state, 0);\
3560 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
3565 static int decode_header(SnowContext *s){
3566 int plane_index, tmp;
3569 memset(kstate, MID_STATE, sizeof(kstate));
3571 s->keyframe= get_rac(&s->c, kstate);
3572 if(s->keyframe || s->always_reset){
3574 s->spatial_decomposition_type=
3578 s->block_max_depth= 0;
3581 GET_S(s->version, tmp <= 0U)
3582 s->always_reset= get_rac(&s->c, s->header_state);
3583 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3584 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3585 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3586 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3587 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3588 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3589 s->spatial_scalability= get_rac(&s->c, s->header_state);
3590 // s->rate_scalability= get_rac(&s->c, s->header_state);
3591 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
3592 s->max_ref_frames++;
3598 if(get_rac(&s->c, s->header_state)){
3599 for(plane_index=0; plane_index<2; plane_index++){
3600 int htaps, i, sum=0;
3601 Plane *p= &s->plane[plane_index];
3602 p->diag_mc= get_rac(&s->c, s->header_state);
3603 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3604 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3607 for(i= htaps/2; i; i--){
3608 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3609 sum += p->hcoeff[i];
3611 p->hcoeff[0]= 32-sum;
3613 s->plane[2].diag_mc= s->plane[1].diag_mc;
3614 s->plane[2].htaps = s->plane[1].htaps;
3615 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3617 if(get_rac(&s->c, s->header_state)){
3618 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3623 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3624 if(s->spatial_decomposition_type > 1U){
3625 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3628 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
3629 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
3630 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
3634 s->qlog += get_symbol(&s->c, s->header_state, 1);
3635 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3636 s->qbias += get_symbol(&s->c, s->header_state, 1);
3637 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3638 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3639 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3640 s->block_max_depth= 0;
3647 static void init_qexp(void){
3651 for(i=0; i<QROOT; i++){
3653 v *= pow(2, 1.0 / QROOT);
3657 static av_cold int common_init(AVCodecContext *avctx){
3658 SnowContext *s = avctx->priv_data;
3663 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
3665 dsputil_init(&s->dsp, avctx);
3668 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3669 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3670 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3671 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3672 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3673 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3692 #define mcfh(dx,dy)\
3693 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3694 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3695 mc_block_hpel ## dx ## dy ## 16;\
3696 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3697 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3698 mc_block_hpel ## dx ## dy ## 8;
3708 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3710 width= s->avctx->width;
3711 height= s->avctx->height;
3713 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3714 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3716 for(i=0; i<MAX_REF_FRAMES; i++)
3717 for(j=0; j<MAX_REF_FRAMES; j++)
3718 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3720 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3721 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3726 static int common_init_after_header(AVCodecContext *avctx){
3727 SnowContext *s = avctx->priv_data;
3728 int plane_index, level, orientation;
3730 for(plane_index=0; plane_index<3; plane_index++){
3731 int w= s->avctx->width;
3732 int h= s->avctx->height;
3735 w>>= s->chroma_h_shift;
3736 h>>= s->chroma_v_shift;
3738 s->plane[plane_index].width = w;
3739 s->plane[plane_index].height= h;
3741 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3742 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3743 SubBand *b= &s->plane[plane_index].band[level][orientation];
3745 b->buf= s->spatial_dwt_buffer;
3747 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3748 b->width = (w + !(orientation&1))>>1;
3749 b->height= (h + !(orientation>1))>>1;
3751 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3752 b->buf_x_offset = 0;
3753 b->buf_y_offset = 0;
3757 b->buf_x_offset = (w+1)>>1;
3760 b->buf += b->stride>>1;
3761 b->buf_y_offset = b->stride_line >> 1;
3763 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3766 b->parent= &s->plane[plane_index].band[level-1][orientation];
3767 //FIXME avoid this realloc
3768 av_freep(&b->x_coeff);
3769 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3779 static int qscale2qlog(int qscale){
3780 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3781 + 61*QROOT/8; //<64 >60
3784 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3786 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3787 * FIXME we know exact mv bits at this point,
3788 * but ratecontrol isn't set up to include them. */
3789 uint32_t coef_sum= 0;
3790 int level, orientation, delta_qlog;
3792 for(level=0; level<s->spatial_decomposition_count; level++){
3793 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3794 SubBand *b= &s->plane[0].band[level][orientation];
3795 IDWTELEM *buf= b->ibuf;
3796 const int w= b->width;
3797 const int h= b->height;
3798 const int stride= b->stride;
3799 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3800 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3801 const int qdiv= (1<<16)/qmul;
3803 //FIXME this is ugly
3806 buf[x+y*stride]= b->buf[x+y*stride];
3808 decorrelate(s, b, buf, stride, 1, 0);
3811 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3815 /* ugly, ratecontrol just takes a sqrt again */
3816 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3817 assert(coef_sum < INT_MAX);
3819 if(pict->pict_type == FF_I_TYPE){
3820 s->m.current_picture.mb_var_sum= coef_sum;
3821 s->m.current_picture.mc_mb_var_sum= 0;
3823 s->m.current_picture.mc_mb_var_sum= coef_sum;
3824 s->m.current_picture.mb_var_sum= 0;
3827 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3828 if (pict->quality < 0)
3830 s->lambda= pict->quality * 3/2;
3831 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3832 s->qlog+= delta_qlog;
3836 static void calculate_visual_weight(SnowContext *s, Plane *p){
3837 int width = p->width;
3838 int height= p->height;
3839 int level, orientation, x, y;
3841 for(level=0; level<s->spatial_decomposition_count; level++){
3842 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3843 SubBand *b= &p->band[level][orientation];
3844 IDWTELEM *ibuf= b->ibuf;
3847 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3848 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3849 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3850 for(y=0; y<height; y++){
3851 for(x=0; x<width; x++){
3852 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3857 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3867 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3868 SubBand *b= &p->band[level][orientation];
3872 int step= 1 << (s->spatial_decomposition_count - level);
3879 //FIXME bias for nonzero ?
3881 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3882 for(y=0; y<p->height; y++){
3883 for(x=0; x<p->width; x++){
3884 int sx= (x-xo + step/2) / step / Q2_STEP;
3885 int sy= (y-yo + step/2) / step / Q2_STEP;
3886 int v= r0[x + y*p->width] - r1[x + y*p->width];
3887 assert(sx>=0 && sy>=0 && sx < score_stride);
3889 score[sx + sy*score_stride] += v*v;
3890 assert(score[sx + sy*score_stride] >= 0);
3895 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3896 int level, orientation;
3898 for(level=0; level<s->spatial_decomposition_count; level++){
3899 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3900 SubBand *b= &p->band[level][orientation];
3901 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3903 dequantize(s, b, dst, b->stride);
3908 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3909 int level, orientation, ys, xs, x, y, pass;
3910 IDWTELEM best_dequant[height * stride];
3911 IDWTELEM idwt2_buffer[height * stride];
3912 const int score_stride= (width + 10)/Q2_STEP;
3913 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3914 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3915 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3917 //FIXME pass the copy cleanly ?
3919 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3920 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3922 for(level=0; level<s->spatial_decomposition_count; level++){
3923 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3924 SubBand *b= &p->band[level][orientation];
3925 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3926 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3927 assert(src == b->buf); // code does not depend on this but it is true currently
3929 quantize(s, b, dst, src, b->stride, s->qbias);
3932 for(pass=0; pass<1; pass++){
3933 if(s->qbias == 0) //keyframe
3935 for(level=0; level<s->spatial_decomposition_count; level++){
3936 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3937 SubBand *b= &p->band[level][orientation];
3938 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3939 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3941 for(ys= 0; ys<Q2_STEP; ys++){
3942 for(xs= 0; xs<Q2_STEP; xs++){
3943 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3944 dequantize_all(s, p, idwt2_buffer, width, height);
3945 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3946 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3947 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3948 for(y=ys; y<b->height; y+= Q2_STEP){
3949 for(x=xs; x<b->width; x+= Q2_STEP){
3950 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3951 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3952 //FIXME try more than just --
3955 dequantize_all(s, p, idwt2_buffer, width, height);
3956 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3957 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3958 for(y=ys; y<b->height; y+= Q2_STEP){
3959 for(x=xs; x<b->width; x+= Q2_STEP){
3960 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3961 if(score[score_idx] <= best_score[score_idx] + threshold){
3962 best_score[score_idx]= score[score_idx];
3963 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3964 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3965 //FIXME copy instead
3974 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3977 #endif /* QUANTIZE2==1 */
3979 static av_cold int encode_init(AVCodecContext *avctx)
3981 SnowContext *s = avctx->priv_data;
3984 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3985 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3986 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3990 if(avctx->prediction_method == DWT_97
3991 && (avctx->flags & CODEC_FLAG_QSCALE)
3992 && avctx->global_quality == 0){
3993 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3997 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3999 s->chroma_h_shift= 1; //FIXME XXX
4000 s->chroma_v_shift= 1;
4002 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
4003 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
4005 for(plane_index=0; plane_index<3; plane_index++){
4006 s->plane[plane_index].diag_mc= 1;
4007 s->plane[plane_index].htaps= 6;
4008 s->plane[plane_index].hcoeff[0]= 40;
4009 s->plane[plane_index].hcoeff[1]= -10;
4010 s->plane[plane_index].hcoeff[2]= 2;
4011 s->plane[plane_index].fast_mc= 1;
4020 s->m.flags = avctx->flags;
4021 s->m.bit_rate= avctx->bit_rate;
4024 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4025 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4026 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4027 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4028 h263_encode_init(&s->m); //mv_penalty
4030 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4032 if(avctx->flags&CODEC_FLAG_PASS1){
4033 if(!avctx->stats_out)
4034 avctx->stats_out = av_mallocz(256);
4036 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4037 if(ff_rate_control_init(&s->m) < 0)
4040 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4042 avctx->coded_frame= &s->current_picture;
4043 switch(avctx->pix_fmt){
4044 // case PIX_FMT_YUV444P:
4045 // case PIX_FMT_YUV422P:
4046 case PIX_FMT_YUV420P:
4048 // case PIX_FMT_YUV411P:
4049 // case PIX_FMT_YUV410P:
4050 s->colorspace_type= 0;
4052 /* case PIX_FMT_RGB32:
4056 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4059 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4060 s->chroma_h_shift= 1;
4061 s->chroma_v_shift= 1;
4063 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4064 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4066 s->avctx->get_buffer(s->avctx, &s->input_picture);
4068 if(s->avctx->me_method == ME_ITER){
4070 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4071 for(i=0; i<s->max_ref_frames; i++){
4072 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4073 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4080 #define USE_HALFPEL_PLANE 0
4082 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4085 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4089 int w= s->avctx->width >>is_chroma;
4090 int h= s->avctx->height >>is_chroma;
4091 int ls= frame->linesize[p];
4092 uint8_t *src= frame->data[p];
4094 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4095 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4096 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4103 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4110 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4118 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4126 static void release_buffer(AVCodecContext *avctx){
4127 SnowContext *s = avctx->priv_data;
4130 if(s->last_picture[s->max_ref_frames-1].data[0]){
4131 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4133 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4134 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4138 static int frame_start(SnowContext *s){
4140 int w= s->avctx->width; //FIXME round up to x16 ?
4141 int h= s->avctx->height;
4143 if(s->current_picture.data[0]){
4144 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4145 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4146 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4149 tmp= s->last_picture[s->max_ref_frames-1];
4150 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4151 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4152 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4153 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4154 s->last_picture[0]= s->current_picture;
4155 s->current_picture= tmp;
4161 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4162 if(i && s->last_picture[i-1].key_frame)
4165 if(s->ref_frames==0){
4166 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
4171 s->current_picture.reference= 1;
4172 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4173 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4177 s->current_picture.key_frame= s->keyframe;
4182 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4183 SnowContext *s = avctx->priv_data;
4184 RangeCoder * const c= &s->c;
4185 AVFrame *pict = data;
4186 const int width= s->avctx->width;
4187 const int height= s->avctx->height;
4188 int level, orientation, plane_index, i, y;
4189 uint8_t rc_header_bak[sizeof(s->header_state)];
4190 uint8_t rc_block_bak[sizeof(s->block_state)];
4192 ff_init_range_encoder(c, buf, buf_size);
4193 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4197 for(y=0; y<(height>>shift); y++)
4198 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4199 &pict->data[i][y * pict->linesize[i]],
4202 s->new_picture = *pict;
4204 s->m.picture_number= avctx->frame_number;
4205 if(avctx->flags&CODEC_FLAG_PASS2){
4207 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4208 s->keyframe= pict->pict_type==FF_I_TYPE;
4209 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4210 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4211 if (pict->quality < 0)
4215 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4217 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4220 if(s->pass1_rc && avctx->frame_number == 0)
4221 pict->quality= 2*FF_QP2LAMBDA;
4223 s->qlog= qscale2qlog(pict->quality);
4224 s->lambda = pict->quality * 3/2;
4226 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4227 s->qlog= LOSSLESS_QLOG;
4229 }//else keep previous frame's qlog until after motion estimation
4233 s->m.current_picture_ptr= &s->m.current_picture;
4234 if(pict->pict_type == FF_P_TYPE){
4235 int block_width = (width +15)>>4;
4236 int block_height= (height+15)>>4;
4237 int stride= s->current_picture.linesize[0];
4239 assert(s->current_picture.data[0]);
4240 assert(s->last_picture[0].data[0]);
4242 s->m.avctx= s->avctx;
4243 s->m.current_picture.data[0]= s->current_picture.data[0];
4244 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4245 s->m. new_picture.data[0]= s-> input_picture.data[0];
4246 s->m. last_picture_ptr= &s->m. last_picture;
4248 s->m. last_picture.linesize[0]=
4249 s->m. new_picture.linesize[0]=
4250 s->m.current_picture.linesize[0]= stride;
4251 s->m.uvlinesize= s->current_picture.linesize[1];
4253 s->m.height= height;
4254 s->m.mb_width = block_width;
4255 s->m.mb_height= block_height;
4256 s->m.mb_stride= s->m.mb_width+1;
4257 s->m.b8_stride= 2*s->m.mb_width+1;
4259 s->m.pict_type= pict->pict_type;
4260 s->m.me_method= s->avctx->me_method;
4261 s->m.me.scene_change_score=0;
4262 s->m.flags= s->avctx->flags;
4263 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4264 s->m.out_format= FMT_H263;
4265 s->m.unrestricted_mv= 1;
4267 s->m.lambda = s->lambda;
4268 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4269 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4271 s->m.dsp= s->dsp; //move
4277 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4278 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4283 if(pict->pict_type == FF_I_TYPE)
4284 s->spatial_decomposition_count= 5;
4286 s->spatial_decomposition_count= 5;
4288 s->m.pict_type = pict->pict_type;
4289 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4291 common_init_after_header(avctx);
4293 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4294 for(plane_index=0; plane_index<3; plane_index++){
4295 calculate_visual_weight(s, &s->plane[plane_index]);
4300 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4301 encode_blocks(s, 1);
4302 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4304 for(plane_index=0; plane_index<3; plane_index++){
4305 Plane *p= &s->plane[plane_index];
4309 // int bits= put_bits_count(&s->c.pb);
4311 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4313 if(pict->data[plane_index]) //FIXME gray hack
4316 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4319 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4322 && pict->pict_type == FF_P_TYPE
4323 && !(avctx->flags&CODEC_FLAG_PASS2)
4324 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4325 ff_init_range_encoder(c, buf, buf_size);
4326 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4327 pict->pict_type= FF_I_TYPE;
4329 s->current_picture.key_frame=1;
4333 if(s->qlog == LOSSLESS_QLOG){
4336 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4342 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4348 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4350 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4352 if(s->pass1_rc && plane_index==0){
4353 int delta_qlog = ratecontrol_1pass(s, pict);
4354 if (delta_qlog <= INT_MIN)
4357 //reordering qlog in the bitstream would eliminate this reset
4358 ff_init_range_encoder(c, buf, buf_size);
4359 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4360 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4362 encode_blocks(s, 0);
4366 for(level=0; level<s->spatial_decomposition_count; level++){
4367 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4368 SubBand *b= &p->band[level][orientation];
4371 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4373 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4374 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4375 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4377 correlate(s, b, b->ibuf, b->stride, 1, 0);
4381 for(level=0; level<s->spatial_decomposition_count; level++){
4382 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4383 SubBand *b= &p->band[level][orientation];
4385 dequantize(s, b, b->ibuf, b->stride);
4389 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4390 if(s->qlog == LOSSLESS_QLOG){
4393 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4397 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4400 if(pict->pict_type == FF_I_TYPE){
4403 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4404 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4408 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4409 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4412 if(s->avctx->flags&CODEC_FLAG_PSNR){
4415 if(pict->data[plane_index]) //FIXME gray hack
4418 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4422 s->avctx->error[plane_index] += error;
4423 s->current_picture.error[plane_index] = error;
4428 update_last_header_values(s);
4430 release_buffer(avctx);
4432 s->current_picture.coded_picture_number = avctx->frame_number;
4433 s->current_picture.pict_type = pict->pict_type;
4434 s->current_picture.quality = pict->quality;
4435 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4436 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4437 s->m.current_picture.display_picture_number =
4438 s->m.current_picture.coded_picture_number = avctx->frame_number;
4439 s->m.current_picture.quality = pict->quality;
4440 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4442 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4444 if(avctx->flags&CODEC_FLAG_PASS1)
4445 ff_write_pass1_stats(&s->m);
4446 s->m.last_pict_type = s->m.pict_type;
4447 avctx->frame_bits = s->m.frame_bits;
4448 avctx->mv_bits = s->m.mv_bits;
4449 avctx->misc_bits = s->m.misc_bits;
4450 avctx->p_tex_bits = s->m.p_tex_bits;
4454 return ff_rac_terminate(c);
4457 static av_cold void common_end(SnowContext *s){
4458 int plane_index, level, orientation, i;
4460 av_freep(&s->spatial_dwt_buffer);
4461 av_freep(&s->spatial_idwt_buffer);
4464 av_freep(&s->m.me.scratchpad);
4465 av_freep(&s->m.me.map);
4466 av_freep(&s->m.me.score_map);
4467 av_freep(&s->m.obmc_scratchpad);
4469 av_freep(&s->block);
4470 av_freep(&s->scratchbuf);
4472 for(i=0; i<MAX_REF_FRAMES; i++){
4473 av_freep(&s->ref_mvs[i]);
4474 av_freep(&s->ref_scores[i]);
4475 if(s->last_picture[i].data[0])
4476 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4479 for(plane_index=0; plane_index<3; plane_index++){
4480 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4481 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4482 SubBand *b= &s->plane[plane_index].band[level][orientation];
4484 av_freep(&b->x_coeff);
4490 static av_cold int encode_end(AVCodecContext *avctx)
4492 SnowContext *s = avctx->priv_data;
4495 av_free(avctx->stats_out);
4500 static av_cold int decode_init(AVCodecContext *avctx)
4502 avctx->pix_fmt= PIX_FMT_YUV420P;
4509 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
4510 const uint8_t *buf = avpkt->data;
4511 int buf_size = avpkt->size;
4512 SnowContext *s = avctx->priv_data;
4513 RangeCoder * const c= &s->c;
4515 AVFrame *picture = data;
4516 int level, orientation, plane_index;
4518 ff_init_range_decoder(c, buf, buf_size);
4519 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4521 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4522 if(decode_header(s)<0)
4524 common_init_after_header(avctx);
4526 // realloc slice buffer for the case that spatial_decomposition_count changed
4527 slice_buffer_destroy(&s->sb);
4528 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4530 for(plane_index=0; plane_index<3; plane_index++){
4531 Plane *p= &s->plane[plane_index];
4532 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4533 && p->hcoeff[1]==-10
4539 if(frame_start(s) < 0)
4541 //keyframe flag duplication mess FIXME
4542 if(avctx->debug&FF_DEBUG_PICT_INFO)
4543 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4547 for(plane_index=0; plane_index<3; plane_index++){
4548 Plane *p= &s->plane[plane_index];
4552 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4554 if(s->avctx->debug&2048){
4555 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4556 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4560 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4561 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4567 for(level=0; level<s->spatial_decomposition_count; level++){
4568 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4569 SubBand *b= &p->band[level][orientation];
4570 unpack_coeffs(s, b, b->parent, orientation);
4576 const int mb_h= s->b_height << s->block_max_depth;
4577 const int block_size = MB_SIZE >> s->block_max_depth;
4578 const int block_w = plane_index ? block_size/2 : block_size;
4580 DWTCompose cs[MAX_DECOMPOSITIONS];
4585 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4586 for(mb_y=0; mb_y<=mb_h; mb_y++){
4588 int slice_starty = block_w*mb_y;
4589 int slice_h = block_w*(mb_y+1);
4590 if (!(s->keyframe || s->avctx->debug&512)){
4591 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4592 slice_h -= (block_w >> 1);
4595 for(level=0; level<s->spatial_decomposition_count; level++){
4596 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4597 SubBand *b= &p->band[level][orientation];
4600 int our_mb_start = mb_y;
4601 int our_mb_end = (mb_y + 1);
4603 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4604 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4605 if (!(s->keyframe || s->avctx->debug&512)){
4606 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4607 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4609 start_y = FFMIN(b->height, start_y);
4610 end_y = FFMIN(b->height, end_y);
4612 if (start_y != end_y){
4613 if (orientation == 0){
4614 SubBand * correlate_band = &p->band[0][0];
4615 int correlate_end_y = FFMIN(b->height, end_y + 1);
4616 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4617 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4618 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4619 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4622 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4627 for(; yd<slice_h; yd+=4){
4628 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4631 if(s->qlog == LOSSLESS_QLOG){
4632 for(; yq<slice_h && yq<h; yq++){
4633 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4635 line[x] <<= FRAC_BITS;
4640 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4642 y = FFMIN(p->height, slice_starty);
4643 end_y = FFMIN(p->height, slice_h);
4645 slice_buffer_release(&s->sb, y++);
4648 slice_buffer_flush(&s->sb);
4655 release_buffer(avctx);
4657 if(!(s->avctx->debug&2048))
4658 *picture= s->current_picture;
4660 *picture= s->mconly_picture;
4662 *data_size = sizeof(AVFrame);
4664 bytes_read= c->bytestream - c->bytestream_start;
4665 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4670 static av_cold int decode_end(AVCodecContext *avctx)
4672 SnowContext *s = avctx->priv_data;
4674 slice_buffer_destroy(&s->sb);
4681 AVCodec snow_decoder = {
4685 sizeof(SnowContext),
4690 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4692 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4695 #if CONFIG_SNOW_ENCODER
4696 AVCodec snow_encoder = {
4700 sizeof(SnowContext),
4704 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4714 #include "libavutil/lfg.h"
4719 int buffer[2][width*height];
4723 s.spatial_decomposition_count=6;
4724 s.spatial_decomposition_type=1;
4726 av_lfg_init(&prn, 1);
4728 printf("testing 5/3 DWT\n");
4729 for(i=0; i<width*height; i++)
4730 buffer[0][i] = buffer[1][i] = av_lfg_get(&prn) % 54321 - 12345;
4732 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4733 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4735 for(i=0; i<width*height; i++)
4736 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4738 printf("testing 9/7 DWT\n");
4739 s.spatial_decomposition_type=0;
4740 for(i=0; i<width*height; i++)
4741 buffer[0][i] = buffer[1][i] = av_lfg_get(&prn) % 54321 - 12345;
4743 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4744 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4746 for(i=0; i<width*height; i++)
4747 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4750 printf("testing AC coder\n");
4751 memset(s.header_state, 0, sizeof(s.header_state));
4752 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4753 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4755 for(i=-256; i<256; i++){
4756 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4758 ff_rac_terminate(&s.c);
4760 memset(s.header_state, 0, sizeof(s.header_state));
4761 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4762 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4764 for(i=-256; i<256; i++){
4766 j= get_symbol(&s.c, s.header_state, 1);
4767 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4771 int level, orientation, x, y;
4772 int64_t errors[8][4];
4775 memset(errors, 0, sizeof(errors));
4776 s.spatial_decomposition_count=3;
4777 s.spatial_decomposition_type=0;
4778 for(level=0; level<s.spatial_decomposition_count; level++){
4779 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4780 int w= width >> (s.spatial_decomposition_count-level);
4781 int h= height >> (s.spatial_decomposition_count-level);
4782 int stride= width << (s.spatial_decomposition_count-level);
4783 DWTELEM *buf= buffer[0];
4786 if(orientation&1) buf+=w;
4787 if(orientation>1) buf+=stride>>1;
4789 memset(buffer[0], 0, sizeof(int)*width*height);
4790 buf[w/2 + h/2*stride]= 256*256;
4791 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4792 for(y=0; y<height; y++){
4793 for(x=0; x<width; x++){
4794 int64_t d= buffer[0][x + y*width];
4796 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4798 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4800 error= (int)(sqrt(error)+0.5);
4801 errors[level][orientation]= error;
4802 if(g) g=av_gcd(g, error);
4806 printf("static int const visual_weight[][4]={\n");
4807 for(level=0; level<s.spatial_decomposition_count; level++){
4809 for(orientation=0; orientation<4; orientation++){
4810 printf("%8"PRId64",", errors[level][orientation]/g);
4817 int w= width >> (s.spatial_decomposition_count-level);
4818 //int h= height >> (s.spatial_decomposition_count-level);
4819 int stride= width << (s.spatial_decomposition_count-level);
4820 DWTELEM *buf= buffer[0];
4826 memset(buffer[0], 0, sizeof(int)*width*height);
4828 for(y=0; y<height; y++){
4829 for(x=0; x<width; x++){
4830 int tab[4]={0,2,3,1};
4831 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4834 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4838 buf[x + y*stride ]=169;
4839 buf[x + y*stride-w]=64;
4842 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4844 for(y=0; y<height; y++){
4845 for(x=0; x<width; x++){
4846 int64_t d= buffer[0][x + y*width];
4848 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4850 if(FFABS(height/2-y)<9) printf("\n");