2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
398 typedef struct x_and_coeff{
403 typedef struct SubBand{
408 int qlog; ///< log(qscale)/log[2^(1/6)]
413 int stride_line; ///< Stride measured in lines, not pixels.
414 x_and_coeff * x_coeff;
415 struct SubBand *parent;
416 uint8_t state[/*7*2*/ 7 + 512][32];
419 typedef struct Plane{
422 SubBand band[MAX_DECOMPOSITIONS][4];
425 typedef struct SnowContext{
426 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
428 AVCodecContext *avctx;
432 AVFrame input_picture; ///< new_picture with the internal linesizes
433 AVFrame current_picture;
434 AVFrame last_picture[MAX_REF_FRAMES];
435 AVFrame mconly_picture;
436 // uint8_t q_context[16];
437 uint8_t header_state[32];
438 uint8_t block_state[128 + 32*128];
442 int spatial_decomposition_type;
443 int last_spatial_decomposition_type;
444 int temporal_decomposition_type;
445 int spatial_decomposition_count;
446 int temporal_decomposition_count;
449 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
450 uint32_t *ref_scores[MAX_REF_FRAMES];
451 DWTELEM *spatial_dwt_buffer;
452 IDWTELEM *spatial_idwt_buffer;
456 int spatial_scalability;
466 #define QBIAS_SHIFT 3
470 int last_block_max_depth;
471 Plane plane[MAX_PLANES];
473 #define ME_CACHE_SIZE 1024
474 int me_cache[ME_CACHE_SIZE];
475 int me_cache_generation;
478 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
489 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
490 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
492 static void iterative_me(SnowContext *s);
494 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
498 buf->base_buffer = base_buffer;
499 buf->line_count = line_count;
500 buf->line_width = line_width;
501 buf->data_count = max_allocated_lines;
502 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
503 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
505 for (i = 0; i < max_allocated_lines; i++)
507 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
510 buf->data_stack_top = max_allocated_lines - 1;
513 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
518 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
520 assert(buf->data_stack_top >= 0);
521 // assert(!buf->line[line]);
523 return buf->line[line];
525 offset = buf->line_width * line;
526 buffer = buf->data_stack[buf->data_stack_top];
527 buf->data_stack_top--;
528 buf->line[line] = buffer;
530 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
535 static void slice_buffer_release(slice_buffer * buf, int line)
540 assert(line >= 0 && line < buf->line_count);
541 assert(buf->line[line]);
543 offset = buf->line_width * line;
544 buffer = buf->line[line];
545 buf->data_stack_top++;
546 buf->data_stack[buf->data_stack_top] = buffer;
547 buf->line[line] = NULL;
549 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
552 static void slice_buffer_flush(slice_buffer * buf)
555 for (i = 0; i < buf->line_count; i++)
559 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
560 slice_buffer_release(buf, i);
565 static void slice_buffer_destroy(slice_buffer * buf)
568 slice_buffer_flush(buf);
570 for (i = buf->data_count - 1; i >= 0; i--)
572 assert(buf->data_stack[i]);
573 av_freep(&buf->data_stack[i]);
575 assert(buf->data_stack);
576 av_freep(&buf->data_stack);
578 av_freep(&buf->line);
582 // Avoid a name clash on SGI IRIX
585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
586 static uint8_t qexp[QROOT];
588 static inline int mirror(int v, int m){
589 while((unsigned)v > (unsigned)m){
596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
600 const int a= FFABS(v);
601 const int e= av_log2(a);
603 const int el= FFMIN(e, 10);
604 put_rac(c, state+0, 0);
607 put_rac(c, state+1+i, 1); //1..10
610 put_rac(c, state+1+9, 1); //1..10
612 put_rac(c, state+1+FFMIN(i,9), 0);
614 for(i=e-1; i>=el; i--){
615 put_rac(c, state+22+9, (a>>i)&1); //22..31
618 put_rac(c, state+22+i, (a>>i)&1); //22..31
622 put_rac(c, state+11 + el, v < 0); //11..21
625 put_rac(c, state+0, 0);
628 put_rac(c, state+1+i, 1); //1..10
630 put_rac(c, state+1+i, 0);
632 for(i=e-1; i>=0; i--){
633 put_rac(c, state+22+i, (a>>i)&1); //22..31
637 put_rac(c, state+11 + e, v < 0); //11..21
640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
642 put_rac(c, state+1+FFMIN(i,9), 0);
644 for(i=e-1; i>=0; i--){
645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
653 put_rac(c, state+0, 1);
657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
658 if(get_rac(c, state+0))
663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
668 for(i=e-1; i>=0; i--){
669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
681 int r= log2>=0 ? 1<<log2 : 1;
687 put_rac(c, state+4+log2, 1);
692 put_rac(c, state+4+log2, 0);
694 for(i=log2-1; i>=0; i--){
695 put_rac(c, state+31-i, (v>>i)&1);
699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
701 int r= log2>=0 ? 1<<log2 : 1;
706 while(get_rac(c, state+4+log2)){
712 for(i=log2-1; i>=0; i--){
713 v+= get_rac(c, state+31-i)<<i;
719 static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
720 const int mirror_left= !highpass;
721 const int mirror_right= (width&1) ^ highpass;
722 const int w= (width>>1) - 1 + (highpass & width);
725 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
727 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
733 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
737 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
741 static av_always_inline void inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
742 const int mirror_left= !highpass;
743 const int mirror_right= (width&1) ^ highpass;
744 const int w= (width>>1) - 1 + (highpass & width);
747 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
749 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
755 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
759 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
764 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
765 const int mirror_left= !highpass;
766 const int mirror_right= (width&1) ^ highpass;
767 const int w= (width>>1) - 1 + (highpass & width);
771 #define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
773 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
779 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
783 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
786 static av_always_inline void inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
787 const int mirror_left= !highpass;
788 const int mirror_right= (width&1) ^ highpass;
789 const int w= (width>>1) - 1 + (highpass & width);
793 #define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
795 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
801 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
805 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
810 static void horizontal_decompose53i(DWTELEM *b, int width){
812 const int width2= width>>1;
814 const int w2= (width+1)>>1;
816 for(x=0; x<width2; x++){
818 temp[x+w2]= b[2*x + 1];
832 for(x=1; x+1<width2; x+=2){
836 A2 += (A1 + A3 + 2)>>2;
840 A1= temp[x+1+width2];
843 A4 += (A1 + A3 + 2)>>2;
849 A2 += (A1 + A3 + 2)>>2;
854 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
855 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
859 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
862 for(i=0; i<width; i++){
863 b1[i] -= (b0[i] + b2[i])>>1;
867 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
870 for(i=0; i<width; i++){
871 b1[i] += (b0[i] + b2[i] + 2)>>2;
875 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
877 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
878 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
880 for(y=-2; y<height; y+=2){
881 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
882 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
885 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
886 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
887 STOP_TIMER("horizontal_decompose53i")}
890 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
891 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
892 STOP_TIMER("vertical_decompose53i*")}
899 static void horizontal_decompose97i(DWTELEM *b, int width){
901 const int w2= (width+1)>>1;
903 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
904 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
905 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
906 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
910 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
913 for(i=0; i<width; i++){
914 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
918 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
921 for(i=0; i<width; i++){
922 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
926 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
929 for(i=0; i<width; i++){
931 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
933 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
938 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
941 for(i=0; i<width; i++){
942 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
946 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
948 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
949 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
950 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
951 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
953 for(y=-4; y<height; y+=2){
954 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
955 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
958 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
959 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
961 STOP_TIMER("horizontal_decompose97i")
965 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
966 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
967 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
968 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
971 STOP_TIMER("vertical_decompose97i")
981 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
984 for(level=0; level<decomposition_count; level++){
986 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
987 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
992 static void horizontal_compose53i(IDWTELEM *b, int width){
993 IDWTELEM temp[width];
994 const int width2= width>>1;
995 const int w2= (width+1)>>1;
1007 for(x=1; x+1<width2; x+=2){
1011 A2 += (A1 + A3 + 2)>>2;
1015 A1= temp[x+1+width2];
1018 A4 += (A1 + A3 + 2)>>2;
1024 A2 += (A1 + A3 + 2)>>2;
1028 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1029 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1031 for(x=0; x<width2; x++){
1033 b[2*x + 1]= temp[x+w2];
1039 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1042 for(i=0; i<width; i++){
1043 b1[i] += (b0[i] + b2[i])>>1;
1047 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1050 for(i=0; i<width; i++){
1051 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1055 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1056 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1057 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1061 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1062 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1063 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1067 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1070 IDWTELEM *b0= cs->b0;
1071 IDWTELEM *b1= cs->b1;
1072 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1073 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1076 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1077 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1078 STOP_TIMER("vertical_compose53i*")}
1081 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1082 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1083 STOP_TIMER("horizontal_compose53i")}
1090 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1092 IDWTELEM *b0= cs->b0;
1093 IDWTELEM *b1= cs->b1;
1094 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1095 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1099 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1100 STOP_TIMER("vertical_compose53i*")}
1103 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1104 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1105 STOP_TIMER("horizontal_compose53i")}
1112 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1114 spatial_compose53i_init(&cs, buffer, height, stride);
1115 while(cs.y <= height)
1116 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1120 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1121 IDWTELEM temp[width];
1122 const int w2= (width+1)>>1;
1124 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1125 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1126 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1127 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1130 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1133 for(i=0; i<width; i++){
1134 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1138 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1141 for(i=0; i<width; i++){
1142 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1146 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1149 for(i=0; i<width; i++){
1151 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1153 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1158 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1161 for(i=0; i<width; i++){
1162 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1166 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1169 for(i=0; i<width; i++){
1170 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1171 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1173 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1175 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1177 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1181 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1182 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1183 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1184 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1185 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1189 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1190 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1191 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1192 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1193 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1197 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1200 IDWTELEM *b0= cs->b0;
1201 IDWTELEM *b1= cs->b1;
1202 IDWTELEM *b2= cs->b2;
1203 IDWTELEM *b3= cs->b3;
1204 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1205 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1208 if(y>0 && y+4<height){
1209 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1211 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1212 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1213 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1214 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1217 STOP_TIMER("vertical_compose97i")}}
1220 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1221 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1222 if(width>400 && y+0<(unsigned)height){
1223 STOP_TIMER("horizontal_compose97i")}}
1232 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1234 IDWTELEM *b0= cs->b0;
1235 IDWTELEM *b1= cs->b1;
1236 IDWTELEM *b2= cs->b2;
1237 IDWTELEM *b3= cs->b3;
1238 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1239 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1242 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1243 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1244 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1245 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1247 STOP_TIMER("vertical_compose97i")}}
1250 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1251 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1252 if(width>400 && b0 <= b2){
1253 STOP_TIMER("horizontal_compose97i")}}
1262 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1264 spatial_compose97i_init(&cs, buffer, height, stride);
1265 while(cs.y <= height)
1266 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1269 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1271 for(level=decomposition_count-1; level>=0; level--){
1273 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1274 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1279 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1281 for(level=decomposition_count-1; level>=0; level--){
1283 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1284 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1289 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1290 const int support = type==1 ? 3 : 5;
1294 for(level=decomposition_count-1; level>=0; level--){
1295 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1297 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1299 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1306 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1307 const int support = type==1 ? 3 : 5;
1311 for(level=decomposition_count-1; level>=0; level--){
1312 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1314 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1316 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1323 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1324 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1326 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1327 for(y=0; y<height; y+=4)
1328 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1331 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1332 const int w= b->width;
1333 const int h= b->height;
1345 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1346 v= src[x + y*stride];
1349 t= src[x + (y-1)*stride];
1351 lt= src[x - 1 + (y-1)*stride];
1354 rt= src[x + 1 + (y-1)*stride];
1358 l= src[x - 1 + y*stride];
1360 if(orientation==1) ll= src[y + (x-2)*stride];
1361 else ll= src[x - 2 + y*stride];
1367 if(px<b->parent->width && py<b->parent->height)
1368 p= parent[px + py*2*stride];
1370 if(!(/*ll|*/l|lt|t|rt|p)){
1372 runs[run_index++]= run;
1380 max_index= run_index;
1381 runs[run_index++]= run;
1383 run= runs[run_index++];
1385 put_symbol2(&s->c, b->state[30], max_index, 0);
1386 if(run_index <= max_index)
1387 put_symbol2(&s->c, b->state[1], run, 3);
1390 if(s->c.bytestream_end - s->c.bytestream < w*40){
1391 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1396 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1397 v= src[x + y*stride];
1400 t= src[x + (y-1)*stride];
1402 lt= src[x - 1 + (y-1)*stride];
1405 rt= src[x + 1 + (y-1)*stride];
1409 l= src[x - 1 + y*stride];
1411 if(orientation==1) ll= src[y + (x-2)*stride];
1412 else ll= src[x - 2 + y*stride];
1418 if(px<b->parent->width && py<b->parent->height)
1419 p= parent[px + py*2*stride];
1421 if(/*ll|*/l|lt|t|rt|p){
1422 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1424 put_rac(&s->c, &b->state[0][context], !!v);
1427 run= runs[run_index++];
1429 if(run_index <= max_index)
1430 put_symbol2(&s->c, b->state[1], run, 3);
1438 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1439 int l2= 2*FFABS(l) + (l<0);
1440 int t2= 2*FFABS(t) + (t<0);
1442 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1443 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1451 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1452 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1453 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1454 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1455 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1458 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1459 const int w= b->width;
1460 const int h= b->height;
1465 x_and_coeff *xc= b->x_coeff;
1466 x_and_coeff *prev_xc= NULL;
1467 x_and_coeff *prev2_xc= xc;
1468 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1469 x_and_coeff *prev_parent_xc= parent_xc;
1471 runs= get_symbol2(&s->c, b->state[30], 0);
1472 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1477 int lt=0, t=0, rt=0;
1479 if(y && prev_xc->x == 0){
1491 if(prev_xc->x == x + 1)
1497 if(x>>1 > parent_xc->x){
1500 if(x>>1 == parent_xc->x){
1501 p= parent_xc->coeff;
1504 if(/*ll|*/l|lt|t|rt|p){
1505 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1507 v=get_rac(&s->c, &b->state[0][context]);
1509 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1510 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1517 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1519 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1520 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1529 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1530 else max_run= FFMIN(run, w-x-1);
1532 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1538 (xc++)->x= w+1; //end marker
1544 while(parent_xc->x != parent->width+1)
1547 prev_parent_xc= parent_xc;
1549 parent_xc= prev_parent_xc;
1554 (xc++)->x= w+1; //end marker
1558 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1559 const int w= b->width;
1561 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1562 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1563 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1568 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1573 /* If we are on the second or later slice, restore our index. */
1575 new_index = save_state[0];
1578 for(y=start_y; y<h; y++){
1581 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1582 memset(line, 0, b->width*sizeof(IDWTELEM));
1583 v = b->x_coeff[new_index].coeff;
1584 x = b->x_coeff[new_index++].x;
1587 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1588 register int u= -(v&1);
1589 line[x] = (t^u) - u;
1591 v = b->x_coeff[new_index].coeff;
1592 x = b->x_coeff[new_index++].x;
1595 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1596 STOP_TIMER("decode_subband")
1599 /* Save our variables for the next slice. */
1600 save_state[0] = new_index;
1605 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1606 int plane_index, level, orientation;
1608 for(plane_index=0; plane_index<3; plane_index++){
1609 for(level=0; level<s->spatial_decomposition_count; level++){
1610 for(orientation=level ? 1:0; orientation<4; orientation++){
1611 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1615 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1616 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1619 static int alloc_blocks(SnowContext *s){
1620 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1621 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1626 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1630 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1631 uint8_t *bytestream= d->bytestream;
1632 uint8_t *bytestream_start= d->bytestream_start;
1634 d->bytestream= bytestream;
1635 d->bytestream_start= bytestream_start;
1638 //near copy & paste from dsputil, FIXME
1639 static int pix_sum(uint8_t * pix, int line_size, int w)
1644 for (i = 0; i < w; i++) {
1645 for (j = 0; j < w; j++) {
1649 pix += line_size - w;
1654 //near copy & paste from dsputil, FIXME
1655 static int pix_norm1(uint8_t * pix, int line_size, int w)
1658 uint32_t *sq = ff_squareTbl + 256;
1661 for (i = 0; i < w; i++) {
1662 for (j = 0; j < w; j ++) {
1666 pix += line_size - w;
1671 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1672 const int w= s->b_width << s->block_max_depth;
1673 const int rem_depth= s->block_max_depth - level;
1674 const int index= (x + y*w) << rem_depth;
1675 const int block_w= 1<<rem_depth;
1688 for(j=0; j<block_w; j++){
1689 for(i=0; i<block_w; i++){
1690 s->block[index + i + j*w]= block;
1695 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1696 const int offset[3]= {
1698 ((y*c->uvstride + x)>>1),
1699 ((y*c->uvstride + x)>>1),
1703 c->src[0][i]= src [i];
1704 c->ref[0][i]= ref [i] + offset[i];
1709 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1710 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1711 if(s->ref_frames == 1){
1712 *mx = mid_pred(left->mx, top->mx, tr->mx);
1713 *my = mid_pred(left->my, top->my, tr->my);
1715 const int *scale = scale_mv_ref[ref];
1716 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1717 (top ->mx * scale[top ->ref] + 128) >>8,
1718 (tr ->mx * scale[tr ->ref] + 128) >>8);
1719 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1720 (top ->my * scale[top ->ref] + 128) >>8,
1721 (tr ->my * scale[tr ->ref] + 128) >>8);
1728 #define P_TOPRIGHT P[3]
1729 #define P_MEDIAN P[4]
1731 #define FLAG_QPEL 1 //must be 1
1733 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1734 uint8_t p_buffer[1024];
1735 uint8_t i_buffer[1024];
1736 uint8_t p_state[sizeof(s->block_state)];
1737 uint8_t i_state[sizeof(s->block_state)];
1739 uint8_t *pbbak= s->c.bytestream;
1740 uint8_t *pbbak_start= s->c.bytestream_start;
1741 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1742 const int w= s->b_width << s->block_max_depth;
1743 const int h= s->b_height << s->block_max_depth;
1744 const int rem_depth= s->block_max_depth - level;
1745 const int index= (x + y*w) << rem_depth;
1746 const int block_w= 1<<(LOG2_MB_SIZE - level);
1747 int trx= (x+1)<<rem_depth;
1748 int try= (y+1)<<rem_depth;
1749 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1750 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1751 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1752 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1753 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1754 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1755 int pl = left->color[0];
1756 int pcb= left->color[1];
1757 int pcr= left->color[2];
1761 const int stride= s->current_picture.linesize[0];
1762 const int uvstride= s->current_picture.linesize[1];
1763 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1764 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1765 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1767 int16_t last_mv[3][2];
1768 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1769 const int shift= 1+qpel;
1770 MotionEstContext *c= &s->m.me;
1771 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1772 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1773 int my_context= av_log2(2*FFABS(left->my - top->my));
1774 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1775 int ref, best_ref, ref_score, ref_mx, ref_my;
1777 assert(sizeof(s->block_state) >= 256);
1779 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1783 // clip predictors / edge ?
1785 P_LEFT[0]= left->mx;
1786 P_LEFT[1]= left->my;
1789 P_TOPRIGHT[0]= tr->mx;
1790 P_TOPRIGHT[1]= tr->my;
1792 last_mv[0][0]= s->block[index].mx;
1793 last_mv[0][1]= s->block[index].my;
1794 last_mv[1][0]= right->mx;
1795 last_mv[1][1]= right->my;
1796 last_mv[2][0]= bottom->mx;
1797 last_mv[2][1]= bottom->my;
1804 assert(c-> stride == stride);
1805 assert(c->uvstride == uvstride);
1807 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1808 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1809 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1810 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1812 c->xmin = - x*block_w - 16+2;
1813 c->ymin = - y*block_w - 16+2;
1814 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1815 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1817 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1818 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1819 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1820 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1821 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1822 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1823 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1825 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1826 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1829 c->pred_x= P_LEFT[0];
1830 c->pred_y= P_LEFT[1];
1832 c->pred_x = P_MEDIAN[0];
1833 c->pred_y = P_MEDIAN[1];
1838 for(ref=0; ref<s->ref_frames; ref++){
1839 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1841 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1842 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1844 assert(ref_mx >= c->xmin);
1845 assert(ref_mx <= c->xmax);
1846 assert(ref_my >= c->ymin);
1847 assert(ref_my <= c->ymax);
1849 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1850 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1851 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1852 if(s->ref_mvs[ref]){
1853 s->ref_mvs[ref][index][0]= ref_mx;
1854 s->ref_mvs[ref][index][1]= ref_my;
1855 s->ref_scores[ref][index]= ref_score;
1857 if(score > ref_score){
1864 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1867 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1869 pc.bytestream_start=
1870 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1871 memcpy(p_state, s->block_state, sizeof(s->block_state));
1873 if(level!=s->block_max_depth)
1874 put_rac(&pc, &p_state[4 + s_context], 1);
1875 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1876 if(s->ref_frames > 1)
1877 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1878 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1879 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1880 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1881 p_len= pc.bytestream - pc.bytestream_start;
1882 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1884 block_s= block_w*block_w;
1885 sum = pix_sum(current_data[0], stride, block_w);
1886 l= (sum + block_s/2)/block_s;
1887 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1889 block_s= block_w*block_w>>2;
1890 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1891 cb= (sum + block_s/2)/block_s;
1892 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1893 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1894 cr= (sum + block_s/2)/block_s;
1895 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1898 ic.bytestream_start=
1899 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1900 memcpy(i_state, s->block_state, sizeof(s->block_state));
1901 if(level!=s->block_max_depth)
1902 put_rac(&ic, &i_state[4 + s_context], 1);
1903 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1904 put_symbol(&ic, &i_state[32], l-pl , 1);
1905 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1906 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1907 i_len= ic.bytestream - ic.bytestream_start;
1908 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1910 // assert(score==256*256*256*64-1);
1911 assert(iscore < 255*255*256 + s->lambda2*10);
1912 assert(iscore >= 0);
1913 assert(l>=0 && l<=255);
1914 assert(pl>=0 && pl<=255);
1917 int varc= iscore >> 8;
1918 int vard= score >> 8;
1919 if (vard <= 64 || vard < varc)
1920 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1922 c->scene_change_score+= s->m.qscale;
1925 if(level!=s->block_max_depth){
1926 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1927 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1928 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1929 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1931 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1933 if(score2 < score && score2 < iscore)
1938 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1939 memcpy(pbbak, i_buffer, i_len);
1941 s->c.bytestream_start= pbbak_start;
1942 s->c.bytestream= pbbak + i_len;
1943 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1944 memcpy(s->block_state, i_state, sizeof(s->block_state));
1947 memcpy(pbbak, p_buffer, p_len);
1949 s->c.bytestream_start= pbbak_start;
1950 s->c.bytestream= pbbak + p_len;
1951 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1952 memcpy(s->block_state, p_state, sizeof(s->block_state));
1957 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1958 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1959 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1961 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1965 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1966 const int w= s->b_width << s->block_max_depth;
1967 const int rem_depth= s->block_max_depth - level;
1968 const int index= (x + y*w) << rem_depth;
1969 int trx= (x+1)<<rem_depth;
1970 BlockNode *b= &s->block[index];
1971 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1972 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1973 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1974 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1975 int pl = left->color[0];
1976 int pcb= left->color[1];
1977 int pcr= left->color[2];
1979 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1980 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1981 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1982 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1985 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1989 if(level!=s->block_max_depth){
1990 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1991 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1993 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1994 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1995 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2001 if(b->type & BLOCK_INTRA){
2002 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2003 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2004 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2005 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2006 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2007 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2009 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2010 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2011 if(s->ref_frames > 1)
2012 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2013 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2014 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2015 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2019 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2020 const int w= s->b_width << s->block_max_depth;
2021 const int rem_depth= s->block_max_depth - level;
2022 const int index= (x + y*w) << rem_depth;
2023 int trx= (x+1)<<rem_depth;
2024 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2025 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2026 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2027 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2028 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2031 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2035 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2037 int l = left->color[0];
2038 int cb= left->color[1];
2039 int cr= left->color[2];
2041 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2042 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2043 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2045 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2048 pred_mv(s, &mx, &my, 0, left, top, tr);
2049 l += get_symbol(&s->c, &s->block_state[32], 1);
2050 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2051 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2053 if(s->ref_frames > 1)
2054 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2055 pred_mv(s, &mx, &my, ref, left, top, tr);
2056 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2057 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2059 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2068 static void encode_blocks(SnowContext *s, int search){
2073 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2077 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2078 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2082 if(s->avctx->me_method == ME_ITER || !search)
2083 encode_q_branch2(s, 0, x, y);
2085 encode_q_branch (s, 0, x, y);
2090 static void decode_blocks(SnowContext *s){
2097 decode_q_branch(s, 0, x, y);
2102 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2105 for(y=0; y < b_h+5; y++){
2106 for(x=0; x < b_w; x++){
2113 // int am= 9*(a1+a2) - (a0+a3);
2114 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2115 // int am= 18*(a2+a3) - 2*(a1+a4);
2116 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2117 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2119 // if(b_w==16) am= 8*(a1+a2);
2121 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2122 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2124 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2125 if(am&(~255)) am= ~(am>>31);
2129 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2130 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2131 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2132 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2137 tmp -= (b_h+5)*stride;
2139 for(y=0; y < b_h; y++){
2140 for(x=0; x < b_w; x++){
2141 int a0= tmp[x + 0*stride];
2142 int a1= tmp[x + 1*stride];
2143 int a2= tmp[x + 2*stride];
2144 int a3= tmp[x + 3*stride];
2145 int a4= tmp[x + 4*stride];
2146 int a5= tmp[x + 5*stride];
2147 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2148 // int am= 18*(a2+a3) - 2*(a1+a4);
2149 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2150 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2152 // if(b_w==16) am= 8*(a1+a2);
2154 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2155 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2157 if(am&(~255)) am= ~(am>>31);
2160 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2161 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2162 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2163 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2168 STOP_TIMER("mc_block")
2171 #define mca(dx,dy,b_w)\
2172 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2173 uint8_t tmp[stride*(b_w+5)];\
2175 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2187 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2188 if(block->type & BLOCK_INTRA){
2190 const int color = block->color[plane_index];
2191 const int color4= color*0x01010101;
2193 for(y=0; y < b_h; y++){
2194 *(uint32_t*)&dst[0 + y*stride]= color4;
2195 *(uint32_t*)&dst[4 + y*stride]= color4;
2196 *(uint32_t*)&dst[8 + y*stride]= color4;
2197 *(uint32_t*)&dst[12+ y*stride]= color4;
2198 *(uint32_t*)&dst[16+ y*stride]= color4;
2199 *(uint32_t*)&dst[20+ y*stride]= color4;
2200 *(uint32_t*)&dst[24+ y*stride]= color4;
2201 *(uint32_t*)&dst[28+ y*stride]= color4;
2204 for(y=0; y < b_h; y++){
2205 *(uint32_t*)&dst[0 + y*stride]= color4;
2206 *(uint32_t*)&dst[4 + y*stride]= color4;
2207 *(uint32_t*)&dst[8 + y*stride]= color4;
2208 *(uint32_t*)&dst[12+ y*stride]= color4;
2211 for(y=0; y < b_h; y++){
2212 *(uint32_t*)&dst[0 + y*stride]= color4;
2213 *(uint32_t*)&dst[4 + y*stride]= color4;
2216 for(y=0; y < b_h; y++){
2217 *(uint32_t*)&dst[0 + y*stride]= color4;
2220 for(y=0; y < b_h; y++){
2221 for(x=0; x < b_w; x++){
2222 dst[x + y*stride]= color;
2227 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2228 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2229 int mx= block->mx*scale;
2230 int my= block->my*scale;
2231 const int dx= mx&15;
2232 const int dy= my&15;
2233 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2236 src += sx + sy*stride;
2237 if( (unsigned)sx >= w - b_w - 4
2238 || (unsigned)sy >= h - b_h - 4){
2239 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2242 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2243 // assert(!(b_w&(b_w-1)));
2244 assert(b_w>1 && b_h>1);
2245 assert(tab_index>=0 && tab_index<4 || b_w==32);
2246 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2247 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2250 for(y=0; y<b_h; y+=16){
2251 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2252 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2255 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2256 else if(b_w==2*b_h){
2257 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2258 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2261 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2262 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2267 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2268 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2271 for(y=0; y<b_h; y++){
2272 //FIXME ugly misuse of obmc_stride
2273 const uint8_t *obmc1= obmc + y*obmc_stride;
2274 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2275 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2276 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2277 dst = slice_buffer_get_line(sb, src_y + y);
2278 for(x=0; x<b_w; x++){
2279 int v= obmc1[x] * block[3][x + y*src_stride]
2280 +obmc2[x] * block[2][x + y*src_stride]
2281 +obmc3[x] * block[1][x + y*src_stride]
2282 +obmc4[x] * block[0][x + y*src_stride];
2284 v <<= 8 - LOG2_OBMC_MAX;
2286 v >>= 8 - FRAC_BITS;
2289 v += dst[x + src_x];
2290 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2291 if(v&(~255)) v= ~(v>>31);
2292 dst8[x + y*src_stride] = v;
2294 dst[x + src_x] -= v;
2300 //FIXME name clenup (b_w, block_w, b_width stuff)
2301 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2302 const int b_width = s->b_width << s->block_max_depth;
2303 const int b_height= s->b_height << s->block_max_depth;
2304 const int b_stride= b_width;
2305 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2306 BlockNode *rt= lt+1;
2307 BlockNode *lb= lt+b_stride;
2308 BlockNode *rb= lb+1;
2310 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2311 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2318 }else if(b_x + 1 >= b_width){
2325 }else if(b_y + 1 >= b_height){
2330 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2333 if(!sliced && !offset_dst)
2336 }else if(src_x + b_w > w){
2340 obmc -= src_y*obmc_stride;
2342 if(!sliced && !offset_dst)
2343 dst -= src_y*dst_stride;
2345 }else if(src_y + b_h> h){
2349 if(b_w<=0 || b_h<=0) return;
2351 assert(src_stride > 2*MB_SIZE + 5);
2352 if(!sliced && offset_dst)
2353 dst += src_x + src_y*dst_stride;
2354 dst8+= src_x + src_y*src_stride;
2355 // src += src_x + src_y*src_stride;
2357 ptmp= tmp + 3*tmp_step;
2360 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2362 if(same_block(lt, rt)){
2367 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2370 if(same_block(lt, lb)){
2372 }else if(same_block(rt, lb)){
2377 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2380 if(same_block(lt, rb) ){
2382 }else if(same_block(rt, rb)){
2384 }else if(same_block(lb, rb)){
2388 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2391 for(y=0; y<b_h; y++){
2392 for(x=0; x<b_w; x++){
2393 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2394 if(add) dst[x + y*dst_stride] += v;
2395 else dst[x + y*dst_stride] -= v;
2398 for(y=0; y<b_h; y++){
2399 uint8_t *obmc2= obmc + (obmc_stride>>1);
2400 for(x=0; x<b_w; x++){
2401 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2402 if(add) dst[x + y*dst_stride] += v;
2403 else dst[x + y*dst_stride] -= v;
2406 for(y=0; y<b_h; y++){
2407 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2408 for(x=0; x<b_w; x++){
2409 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2410 if(add) dst[x + y*dst_stride] += v;
2411 else dst[x + y*dst_stride] -= v;
2414 for(y=0; y<b_h; y++){
2415 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2416 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2417 for(x=0; x<b_w; x++){
2418 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2419 if(add) dst[x + y*dst_stride] += v;
2420 else dst[x + y*dst_stride] -= v;
2427 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2428 STOP_TIMER("inner_add_yblock")
2430 for(y=0; y<b_h; y++){
2431 //FIXME ugly misuse of obmc_stride
2432 const uint8_t *obmc1= obmc + y*obmc_stride;
2433 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2434 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2435 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2436 for(x=0; x<b_w; x++){
2437 int v= obmc1[x] * block[3][x + y*src_stride]
2438 +obmc2[x] * block[2][x + y*src_stride]
2439 +obmc3[x] * block[1][x + y*src_stride]
2440 +obmc4[x] * block[0][x + y*src_stride];
2442 v <<= 8 - LOG2_OBMC_MAX;
2444 v >>= 8 - FRAC_BITS;
2447 v += dst[x + y*dst_stride];
2448 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2449 if(v&(~255)) v= ~(v>>31);
2450 dst8[x + y*src_stride] = v;
2452 dst[x + y*dst_stride] -= v;
2459 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2460 Plane *p= &s->plane[plane_index];
2461 const int mb_w= s->b_width << s->block_max_depth;
2462 const int mb_h= s->b_height << s->block_max_depth;
2464 int block_size = MB_SIZE >> s->block_max_depth;
2465 int block_w = plane_index ? block_size/2 : block_size;
2466 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2467 int obmc_stride= plane_index ? block_size : 2*block_size;
2468 int ref_stride= s->current_picture.linesize[plane_index];
2469 uint8_t *dst8= s->current_picture.data[plane_index];
2474 if(s->keyframe || (s->avctx->debug&512)){
2479 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2481 // DWTELEM * line = slice_buffer_get_line(sb, y);
2482 IDWTELEM * line = sb->line[y];
2485 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2486 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2488 if(v&(~255)) v= ~(v>>31);
2489 dst8[x + y*ref_stride]= v;
2493 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2495 // DWTELEM * line = slice_buffer_get_line(sb, y);
2496 IDWTELEM * line = sb->line[y];
2499 line[x] -= 128 << FRAC_BITS;
2500 // buf[x + y*w]-= 128<<FRAC_BITS;
2508 for(mb_x=0; mb_x<=mb_w; mb_x++){
2511 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2512 block_w*mb_x - block_w/2,
2513 block_w*mb_y - block_w/2,
2516 w, ref_stride, obmc_stride,
2518 add, 0, plane_index);
2520 STOP_TIMER("add_yblock")
2523 STOP_TIMER("predict_slice")
2526 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2527 Plane *p= &s->plane[plane_index];
2528 const int mb_w= s->b_width << s->block_max_depth;
2529 const int mb_h= s->b_height << s->block_max_depth;
2531 int block_size = MB_SIZE >> s->block_max_depth;
2532 int block_w = plane_index ? block_size/2 : block_size;
2533 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2534 const int obmc_stride= plane_index ? block_size : 2*block_size;
2535 int ref_stride= s->current_picture.linesize[plane_index];
2536 uint8_t *dst8= s->current_picture.data[plane_index];
2541 if(s->keyframe || (s->avctx->debug&512)){
2546 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2548 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2550 if(v&(~255)) v= ~(v>>31);
2551 dst8[x + y*ref_stride]= v;
2555 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2557 buf[x + y*w]-= 128<<FRAC_BITS;
2565 for(mb_x=0; mb_x<=mb_w; mb_x++){
2568 add_yblock(s, 0, NULL, buf, dst8, obmc,
2569 block_w*mb_x - block_w/2,
2570 block_w*mb_y - block_w/2,
2573 w, ref_stride, obmc_stride,
2575 add, 1, plane_index);
2577 STOP_TIMER("add_yblock")
2580 STOP_TIMER("predict_slice")
2583 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2584 const int mb_h= s->b_height << s->block_max_depth;
2586 for(mb_y=0; mb_y<=mb_h; mb_y++)
2587 predict_slice(s, buf, plane_index, add, mb_y);
2590 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2592 Plane *p= &s->plane[plane_index];
2593 const int block_size = MB_SIZE >> s->block_max_depth;
2594 const int block_w = plane_index ? block_size/2 : block_size;
2595 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2596 const int obmc_stride= plane_index ? block_size : 2*block_size;
2597 const int ref_stride= s->current_picture.linesize[plane_index];
2598 uint8_t *src= s-> input_picture.data[plane_index];
2599 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2600 const int b_stride = s->b_width << s->block_max_depth;
2601 const int w= p->width;
2602 const int h= p->height;
2603 int index= mb_x + mb_y*b_stride;
2604 BlockNode *b= &s->block[index];
2605 BlockNode backup= *b;
2609 b->type|= BLOCK_INTRA;
2610 b->color[plane_index]= 0;
2611 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2614 int mb_x2= mb_x + (i &1) - 1;
2615 int mb_y2= mb_y + (i>>1) - 1;
2616 int x= block_w*mb_x2 + block_w/2;
2617 int y= block_w*mb_y2 + block_w/2;
2619 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2620 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2622 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2623 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2624 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2625 int obmc_v= obmc[index];
2627 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2628 if(x<0) obmc_v += obmc[index + block_w];
2629 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2630 if(x+block_w>w) obmc_v += obmc[index - block_w];
2631 //FIXME precalc this or simplify it somehow else
2633 d = -dst[index] + (1<<(FRAC_BITS-1));
2635 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2636 aa += obmc_v * obmc_v; //FIXME precalclate this
2642 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2645 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2646 const int b_stride = s->b_width << s->block_max_depth;
2647 const int b_height = s->b_height<< s->block_max_depth;
2648 int index= x + y*b_stride;
2649 const BlockNode *b = &s->block[index];
2650 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2651 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2652 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2653 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2655 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2656 // int my_context= av_log2(2*FFABS(left->my - top->my));
2658 if(x<0 || x>=b_stride || y>=b_height)
2665 00001XXXX 15-30 8-15
2667 //FIXME try accurate rate
2668 //FIXME intra and inter predictors if surrounding blocks arent the same type
2669 if(b->type & BLOCK_INTRA){
2670 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2671 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2672 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2674 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2677 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2678 + av_log2(2*FFABS(dmy))
2679 + av_log2(2*b->ref));
2683 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2684 Plane *p= &s->plane[plane_index];
2685 const int block_size = MB_SIZE >> s->block_max_depth;
2686 const int block_w = plane_index ? block_size/2 : block_size;
2687 const int obmc_stride= plane_index ? block_size : 2*block_size;
2688 const int ref_stride= s->current_picture.linesize[plane_index];
2689 uint8_t *dst= s->current_picture.data[plane_index];
2690 uint8_t *src= s-> input_picture.data[plane_index];
2691 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2692 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2693 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2694 const int b_stride = s->b_width << s->block_max_depth;
2695 const int b_height = s->b_height<< s->block_max_depth;
2696 const int w= p->width;
2697 const int h= p->height;
2700 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2701 int sx= block_w*mb_x - block_w/2;
2702 int sy= block_w*mb_y - block_w/2;
2703 int x0= FFMAX(0,-sx);
2704 int y0= FFMAX(0,-sy);
2705 int x1= FFMIN(block_w*2, w-sx);
2706 int y1= FFMIN(block_w*2, h-sy);
2709 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2711 for(y=y0; y<y1; y++){
2712 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2713 const IDWTELEM *pred1 = pred + y*obmc_stride;
2714 uint8_t *cur1 = cur + y*ref_stride;
2715 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2716 for(x=x0; x<x1; x++){
2717 #if FRAC_BITS >= LOG2_OBMC_MAX
2718 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2720 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2722 v = (v + pred1[x]) >> FRAC_BITS;
2723 if(v&(~255)) v= ~(v>>31);
2728 /* copy the regions where obmc[] = (uint8_t)256 */
2729 if(LOG2_OBMC_MAX == 8
2730 && (mb_x == 0 || mb_x == b_stride-1)
2731 && (mb_y == 0 || mb_y == b_height-1)){
2740 for(y=y0; y<y1; y++)
2741 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2745 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2746 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2747 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2748 * so improving the score of one block is not strictly guaranteed to
2749 * improve the score of the whole frame, so iterative motion est
2750 * doesn't always converge. */
2751 if(s->avctx->me_cmp == FF_CMP_W97)
2752 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2753 else if(s->avctx->me_cmp == FF_CMP_W53)
2754 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2758 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2759 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2764 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2773 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2775 if(mb_x == b_stride-2)
2776 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2778 return distortion + rate*penalty_factor;
2781 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2783 Plane *p= &s->plane[plane_index];
2784 const int block_size = MB_SIZE >> s->block_max_depth;
2785 const int block_w = plane_index ? block_size/2 : block_size;
2786 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2787 const int obmc_stride= plane_index ? block_size : 2*block_size;
2788 const int ref_stride= s->current_picture.linesize[plane_index];
2789 uint8_t *dst= s->current_picture.data[plane_index];
2790 uint8_t *src= s-> input_picture.data[plane_index];
2791 static const IDWTELEM zero_dst[4096]; //FIXME
2792 const int b_stride = s->b_width << s->block_max_depth;
2793 const int w= p->width;
2794 const int h= p->height;
2797 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2800 int mb_x2= mb_x + (i%3) - 1;
2801 int mb_y2= mb_y + (i/3) - 1;
2802 int x= block_w*mb_x2 + block_w/2;
2803 int y= block_w*mb_y2 + block_w/2;
2805 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2806 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2808 //FIXME find a cleaner/simpler way to skip the outside stuff
2809 for(y2= y; y2<0; y2++)
2810 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2811 for(y2= h; y2<y+block_w; y2++)
2812 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2814 for(y2= y; y2<y+block_w; y2++)
2815 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2818 for(y2= y; y2<y+block_w; y2++)
2819 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2822 assert(block_w== 8 || block_w==16);
2823 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2827 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2828 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2836 rate = get_block_bits(s, mb_x, mb_y, 2);
2837 for(i=merged?4:0; i<9; i++){
2838 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2839 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2842 return distortion + rate*penalty_factor;
2845 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2846 const int b_stride= s->b_width << s->block_max_depth;
2847 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2848 BlockNode backup= *block;
2849 int rd, index, value;
2851 assert(mb_x>=0 && mb_y>=0);
2852 assert(mb_x<b_stride);
2855 block->color[0] = p[0];
2856 block->color[1] = p[1];
2857 block->color[2] = p[2];
2858 block->type |= BLOCK_INTRA;
2860 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2861 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2862 if(s->me_cache[index] == value)
2864 s->me_cache[index]= value;
2868 block->type &= ~BLOCK_INTRA;
2871 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2883 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
2884 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2885 int p[2] = {p0, p1};
2886 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2889 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2890 const int b_stride= s->b_width << s->block_max_depth;
2891 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2892 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2893 int rd, index, value;
2895 assert(mb_x>=0 && mb_y>=0);
2896 assert(mb_x<b_stride);
2897 assert(((mb_x|mb_y)&1) == 0);
2899 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2900 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2901 if(s->me_cache[index] == value)
2903 s->me_cache[index]= value;
2908 block->type &= ~BLOCK_INTRA;
2909 block[1]= block[b_stride]= block[b_stride+1]= *block;
2911 rd= get_4block_rd(s, mb_x, mb_y, 0);
2918 block[0]= backup[0];
2919 block[1]= backup[1];
2920 block[b_stride]= backup[2];
2921 block[b_stride+1]= backup[3];
2926 static void iterative_me(SnowContext *s){
2927 int pass, mb_x, mb_y;
2928 const int b_width = s->b_width << s->block_max_depth;
2929 const int b_height= s->b_height << s->block_max_depth;
2930 const int b_stride= b_width;
2934 RangeCoder r = s->c;
2935 uint8_t state[sizeof(s->block_state)];
2936 memcpy(state, s->block_state, sizeof(s->block_state));
2937 for(mb_y= 0; mb_y<s->b_height; mb_y++)
2938 for(mb_x= 0; mb_x<s->b_width; mb_x++)
2939 encode_q_branch(s, 0, mb_x, mb_y);
2941 memcpy(s->block_state, state, sizeof(s->block_state));
2944 for(pass=0; pass<25; pass++){
2947 for(mb_y= 0; mb_y<b_height; mb_y++){
2948 for(mb_x= 0; mb_x<b_width; mb_x++){
2949 int dia_change, i, j, ref;
2950 int best_rd= INT_MAX, ref_rd;
2951 BlockNode backup, ref_b;
2952 const int index= mb_x + mb_y * b_stride;
2953 BlockNode *block= &s->block[index];
2954 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
2955 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
2956 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
2957 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
2958 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
2959 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
2960 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
2961 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
2962 const int b_w= (MB_SIZE >> s->block_max_depth);
2963 uint8_t obmc_edged[b_w*2][b_w*2];
2965 if(pass && (block->type & BLOCK_OPT))
2967 block->type |= BLOCK_OPT;
2971 if(!s->me_cache_generation)
2972 memset(s->me_cache, 0, sizeof(s->me_cache));
2973 s->me_cache_generation += 1<<22;
2978 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
2980 for(y=0; y<b_w*2; y++)
2981 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
2982 if(mb_x==b_stride-1)
2983 for(y=0; y<b_w*2; y++)
2984 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
2986 for(x=0; x<b_w*2; x++)
2987 obmc_edged[0][x] += obmc_edged[b_w-1][x];
2988 for(y=1; y<b_w; y++)
2989 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
2991 if(mb_y==b_height-1){
2992 for(x=0; x<b_w*2; x++)
2993 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
2994 for(y=b_w; y<b_w*2-1; y++)
2995 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
2999 //skip stuff outside the picture
3000 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3002 uint8_t *src= s-> input_picture.data[0];
3003 uint8_t *dst= s->current_picture.data[0];
3004 const int stride= s->current_picture.linesize[0];
3005 const int block_w= MB_SIZE >> s->block_max_depth;
3006 const int sx= block_w*mb_x - block_w/2;
3007 const int sy= block_w*mb_y - block_w/2;
3008 const int w= s->plane[0].width;
3009 const int h= s->plane[0].height;
3013 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3014 for(y=h; y<sy+block_w*2; y++)
3015 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3017 for(y=sy; y<sy+block_w*2; y++)
3018 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3020 if(sx+block_w*2 > w){
3021 for(y=sy; y<sy+block_w*2; y++)
3022 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3026 // intra(black) = neighbors' contribution to the current block
3028 color[i]= get_dc(s, mb_x, mb_y, i);
3030 // get previous score (cannot be cached due to OBMC)
3031 if(pass > 0 && (block->type&BLOCK_INTRA)){
3032 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3033 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3035 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3039 for(ref=0; ref < s->ref_frames; ref++){
3040 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3041 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3046 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3047 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3049 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3051 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3053 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3055 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3058 //FIXME avoid subpel interpol / round to nearest integer
3061 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3063 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3064 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3065 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3066 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3072 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3075 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3077 //FIXME or try the standard 2 pass qpel or similar
3079 mvr[0][0]= block->mx;
3080 mvr[0][1]= block->my;
3081 if(ref_rd > best_rd){
3089 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3090 //FIXME RD style color selection
3092 if(!same_block(block, &backup)){
3093 if(tb ) tb ->type &= ~BLOCK_OPT;
3094 if(lb ) lb ->type &= ~BLOCK_OPT;
3095 if(rb ) rb ->type &= ~BLOCK_OPT;
3096 if(bb ) bb ->type &= ~BLOCK_OPT;
3097 if(tlb) tlb->type &= ~BLOCK_OPT;
3098 if(trb) trb->type &= ~BLOCK_OPT;
3099 if(blb) blb->type &= ~BLOCK_OPT;
3100 if(brb) brb->type &= ~BLOCK_OPT;
3105 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3110 if(s->block_max_depth == 1){
3112 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3113 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3115 int best_rd, init_rd;
3116 const int index= mb_x + mb_y * b_stride;
3119 b[0]= &s->block[index];
3121 b[2]= b[0]+b_stride;
3123 if(same_block(b[0], b[1]) &&
3124 same_block(b[0], b[2]) &&
3125 same_block(b[0], b[3]))
3128 if(!s->me_cache_generation)
3129 memset(s->me_cache, 0, sizeof(s->me_cache));
3130 s->me_cache_generation += 1<<22;
3132 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3134 //FIXME more multiref search?
3135 check_4block_inter(s, mb_x, mb_y,
3136 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3137 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3140 if(!(b[i]->type&BLOCK_INTRA))
3141 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3143 if(init_rd != best_rd)
3147 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3151 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3152 const int level= b->level;
3153 const int w= b->width;
3154 const int h= b->height;
3155 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3156 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3157 int x,y, thres1, thres2;
3160 if(s->qlog == LOSSLESS_QLOG){
3163 dst[x + y*stride]= src[x + y*stride];
3167 bias= bias ? 0 : (3*qmul)>>3;
3168 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3174 int i= src[x + y*stride];
3176 if((unsigned)(i+thres1) > thres2){
3179 i/= qmul; //FIXME optimize
3180 dst[x + y*stride]= i;
3184 i/= qmul; //FIXME optimize
3185 dst[x + y*stride]= -i;
3188 dst[x + y*stride]= 0;
3194 int i= src[x + y*stride];
3196 if((unsigned)(i+thres1) > thres2){
3199 i= (i + bias) / qmul; //FIXME optimize
3200 dst[x + y*stride]= i;
3204 i= (i + bias) / qmul; //FIXME optimize
3205 dst[x + y*stride]= -i;
3208 dst[x + y*stride]= 0;
3212 if(level+1 == s->spatial_decomposition_count){
3213 // STOP_TIMER("quantize")
3217 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3218 const int w= b->width;
3219 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3220 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3221 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3225 if(s->qlog == LOSSLESS_QLOG) return;
3227 for(y=start_y; y<end_y; y++){
3228 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3229 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3233 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3235 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3239 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3240 STOP_TIMER("dquant")
3244 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3245 const int w= b->width;
3246 const int h= b->height;
3247 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3248 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3249 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3253 if(s->qlog == LOSSLESS_QLOG) return;
3257 int i= src[x + y*stride];
3259 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3261 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3265 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3266 STOP_TIMER("dquant")
3270 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3271 const int w= b->width;
3272 const int h= b->height;
3275 for(y=h-1; y>=0; y--){
3276 for(x=w-1; x>=0; x--){
3277 int i= x + y*stride;
3281 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3282 else src[i] -= src[i - 1];
3284 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3285 else src[i] -= src[i - 1];
3288 if(y) src[i] -= src[i - stride];
3294 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3295 const int w= b->width;
3300 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3304 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3306 for(y=start_y; y<end_y; y++){
3308 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3309 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3313 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3314 else line[x] += line[x - 1];
3316 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3317 else line[x] += line[x - 1];
3320 if(y) line[x] += prev[x];
3325 // STOP_TIMER("correlate")
3328 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3329 const int w= b->width;
3330 const int h= b->height;
3335 int i= x + y*stride;
3339 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3340 else src[i] += src[i - 1];
3342 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3343 else src[i] += src[i - 1];
3346 if(y) src[i] += src[i - stride];
3352 static void encode_header(SnowContext *s){
3353 int plane_index, level, orientation;
3356 memset(kstate, MID_STATE, sizeof(kstate));
3358 put_rac(&s->c, kstate, s->keyframe);
3359 if(s->keyframe || s->always_reset){
3361 s->last_spatial_decomposition_type=
3365 s->last_block_max_depth= 0;
3368 put_symbol(&s->c, s->header_state, s->version, 0);
3369 put_rac(&s->c, s->header_state, s->always_reset);
3370 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3371 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3372 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3373 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3374 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3375 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3376 put_rac(&s->c, s->header_state, s->spatial_scalability);
3377 // put_rac(&s->c, s->header_state, s->rate_scalability);
3378 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3380 for(plane_index=0; plane_index<2; plane_index++){
3381 for(level=0; level<s->spatial_decomposition_count; level++){
3382 for(orientation=level ? 1:0; orientation<4; orientation++){
3383 if(orientation==2) continue;
3384 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3389 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3390 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3391 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3392 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3393 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3395 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3396 s->last_qlog = s->qlog;
3397 s->last_qbias = s->qbias;
3398 s->last_mv_scale = s->mv_scale;
3399 s->last_block_max_depth = s->block_max_depth;
3402 static int decode_header(SnowContext *s){
3403 int plane_index, level, orientation;
3406 memset(kstate, MID_STATE, sizeof(kstate));
3408 s->keyframe= get_rac(&s->c, kstate);
3409 if(s->keyframe || s->always_reset){
3411 s->spatial_decomposition_type=
3415 s->block_max_depth= 0;
3418 s->version= get_symbol(&s->c, s->header_state, 0);
3420 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3423 s->always_reset= get_rac(&s->c, s->header_state);
3424 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3425 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3426 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3427 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3428 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3429 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3430 s->spatial_scalability= get_rac(&s->c, s->header_state);
3431 // s->rate_scalability= get_rac(&s->c, s->header_state);
3432 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3434 for(plane_index=0; plane_index<3; plane_index++){
3435 for(level=0; level<s->spatial_decomposition_count; level++){
3436 for(orientation=level ? 1:0; orientation<4; orientation++){
3438 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3439 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3440 else q= get_symbol(&s->c, s->header_state, 1);
3441 s->plane[plane_index].band[level][orientation].qlog= q;
3447 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3448 if(s->spatial_decomposition_type > 1){
3449 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3453 s->qlog += get_symbol(&s->c, s->header_state, 1);
3454 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3455 s->qbias += get_symbol(&s->c, s->header_state, 1);
3456 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3457 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3458 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3459 s->block_max_depth= 0;
3466 static void init_qexp(void){
3470 for(i=0; i<QROOT; i++){
3472 v *= pow(2, 1.0 / QROOT);
3476 static int common_init(AVCodecContext *avctx){
3477 SnowContext *s = avctx->priv_data;
3479 int level, orientation, plane_index, dec;
3484 dsputil_init(&s->dsp, avctx);
3487 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3488 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3489 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3490 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3491 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3492 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3511 #define mcfh(dx,dy)\
3512 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3513 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3514 mc_block_hpel ## dx ## dy ## 16;\
3515 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3516 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3517 mc_block_hpel ## dx ## dy ## 8;
3527 dec= s->spatial_decomposition_count= 5;
3528 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3530 s->chroma_h_shift= 1; //FIXME XXX
3531 s->chroma_v_shift= 1;
3533 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3535 width= s->avctx->width;
3536 height= s->avctx->height;
3538 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3539 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3541 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3542 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3544 for(plane_index=0; plane_index<3; plane_index++){
3545 int w= s->avctx->width;
3546 int h= s->avctx->height;
3549 w>>= s->chroma_h_shift;
3550 h>>= s->chroma_v_shift;
3552 s->plane[plane_index].width = w;
3553 s->plane[plane_index].height= h;
3554 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3555 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3556 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3557 SubBand *b= &s->plane[plane_index].band[level][orientation];
3559 b->buf= s->spatial_dwt_buffer;
3561 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3562 b->width = (w + !(orientation&1))>>1;
3563 b->height= (h + !(orientation>1))>>1;
3565 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3566 b->buf_x_offset = 0;
3567 b->buf_y_offset = 0;
3571 b->buf_x_offset = (w+1)>>1;
3574 b->buf += b->stride>>1;
3575 b->buf_y_offset = b->stride_line >> 1;
3577 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3580 b->parent= &s->plane[plane_index].band[level-1][orientation];
3581 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3588 for(i=0; i<MAX_REF_FRAMES; i++)
3589 for(j=0; j<MAX_REF_FRAMES; j++)
3590 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3594 width= s->width= avctx->width;
3595 height= s->height= avctx->height;
3597 assert(width && height);
3599 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3604 static int qscale2qlog(int qscale){
3605 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3606 + 61*QROOT/8; //<64 >60
3609 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3611 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3612 * FIXME we know exact mv bits at this point,
3613 * but ratecontrol isn't set up to include them. */
3614 uint32_t coef_sum= 0;
3615 int level, orientation, delta_qlog;
3617 for(level=0; level<s->spatial_decomposition_count; level++){
3618 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3619 SubBand *b= &s->plane[0].band[level][orientation];
3620 IDWTELEM *buf= b->ibuf;
3621 const int w= b->width;
3622 const int h= b->height;
3623 const int stride= b->stride;
3624 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3625 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3626 const int qdiv= (1<<16)/qmul;
3628 //FIXME this is ugly
3631 buf[x+y*stride]= b->buf[x+y*stride];
3633 decorrelate(s, b, buf, stride, 1, 0);
3636 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3640 /* ugly, ratecontrol just takes a sqrt again */
3641 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3642 assert(coef_sum < INT_MAX);
3644 if(pict->pict_type == I_TYPE){
3645 s->m.current_picture.mb_var_sum= coef_sum;
3646 s->m.current_picture.mc_mb_var_sum= 0;
3648 s->m.current_picture.mc_mb_var_sum= coef_sum;
3649 s->m.current_picture.mb_var_sum= 0;
3652 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3653 if (pict->quality < 0)
3655 s->lambda= pict->quality * 3/2;
3656 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3657 s->qlog+= delta_qlog;
3661 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3662 int width = p->width;
3663 int height= p->height;
3664 int level, orientation, x, y;
3666 for(level=0; level<s->spatial_decomposition_count; level++){
3667 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3668 SubBand *b= &p->band[level][orientation];
3669 IDWTELEM *ibuf= b->ibuf;
3672 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3673 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3674 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3675 for(y=0; y<height; y++){
3676 for(x=0; x<width; x++){
3677 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3682 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3683 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3688 static int encode_init(AVCodecContext *avctx)
3690 SnowContext *s = avctx->priv_data;
3693 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3694 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3695 "use vstrict=-2 / -strict -2 to use it anyway\n");
3699 if(avctx->prediction_method == DWT_97
3700 && (avctx->flags & CODEC_FLAG_QSCALE)
3701 && avctx->global_quality == 0){
3702 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3712 s->m.flags = avctx->flags;
3713 s->m.bit_rate= avctx->bit_rate;
3715 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3716 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3717 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3718 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3719 h263_encode_init(&s->m); //mv_penalty
3721 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3723 if(avctx->flags&CODEC_FLAG_PASS1){
3724 if(!avctx->stats_out)
3725 avctx->stats_out = av_mallocz(256);
3727 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3728 if(ff_rate_control_init(&s->m) < 0)
3731 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3733 for(plane_index=0; plane_index<3; plane_index++){
3734 calculate_vissual_weight(s, &s->plane[plane_index]);
3738 avctx->coded_frame= &s->current_picture;
3739 switch(avctx->pix_fmt){
3740 // case PIX_FMT_YUV444P:
3741 // case PIX_FMT_YUV422P:
3742 case PIX_FMT_YUV420P:
3744 // case PIX_FMT_YUV411P:
3745 // case PIX_FMT_YUV410P:
3746 s->colorspace_type= 0;
3748 /* case PIX_FMT_RGB32:
3752 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3755 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3756 s->chroma_h_shift= 1;
3757 s->chroma_v_shift= 1;
3759 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3760 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3762 s->avctx->get_buffer(s->avctx, &s->input_picture);
3764 if(s->avctx->me_method == ME_ITER){
3766 int size= s->b_width * s->b_height << 2*s->block_max_depth;
3767 for(i=0; i<s->max_ref_frames; i++){
3768 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3769 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3776 static int frame_start(SnowContext *s){
3778 int w= s->avctx->width; //FIXME round up to x16 ?
3779 int h= s->avctx->height;
3781 if(s->current_picture.data[0]){
3782 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
3783 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3784 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3787 tmp= s->last_picture[s->max_ref_frames-1];
3788 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
3789 s->last_picture[0]= s->current_picture;
3790 s->current_picture= tmp;
3796 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
3797 if(i && s->last_picture[i-1].key_frame)
3802 s->current_picture.reference= 1;
3803 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3804 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3808 s->current_picture.key_frame= s->keyframe;
3813 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3814 SnowContext *s = avctx->priv_data;
3815 RangeCoder * const c= &s->c;
3816 AVFrame *pict = data;
3817 const int width= s->avctx->width;
3818 const int height= s->avctx->height;
3819 int level, orientation, plane_index, i, y;
3820 uint8_t rc_header_bak[sizeof(s->header_state)];
3821 uint8_t rc_block_bak[sizeof(s->block_state)];
3823 ff_init_range_encoder(c, buf, buf_size);
3824 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3828 for(y=0; y<(height>>shift); y++)
3829 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3830 &pict->data[i][y * pict->linesize[i]],
3833 s->new_picture = *pict;
3835 s->m.picture_number= avctx->frame_number;
3836 if(avctx->flags&CODEC_FLAG_PASS2){
3838 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3839 s->keyframe= pict->pict_type==FF_I_TYPE;
3840 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3841 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3842 if (pict->quality < 0)
3846 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3848 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3851 if(s->pass1_rc && avctx->frame_number == 0)
3852 pict->quality= 2*FF_QP2LAMBDA;
3854 s->qlog= qscale2qlog(pict->quality);
3855 s->lambda = pict->quality * 3/2;
3857 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3858 s->qlog= LOSSLESS_QLOG;
3860 }//else keep previous frame's qlog until after motion est
3864 s->m.current_picture_ptr= &s->m.current_picture;
3865 if(pict->pict_type == P_TYPE){
3866 int block_width = (width +15)>>4;
3867 int block_height= (height+15)>>4;
3868 int stride= s->current_picture.linesize[0];
3870 assert(s->current_picture.data[0]);
3871 assert(s->last_picture[0].data[0]);
3873 s->m.avctx= s->avctx;
3874 s->m.current_picture.data[0]= s->current_picture.data[0];
3875 s->m. last_picture.data[0]= s->last_picture[0].data[0];
3876 s->m. new_picture.data[0]= s-> input_picture.data[0];
3877 s->m. last_picture_ptr= &s->m. last_picture;
3879 s->m. last_picture.linesize[0]=
3880 s->m. new_picture.linesize[0]=
3881 s->m.current_picture.linesize[0]= stride;
3882 s->m.uvlinesize= s->current_picture.linesize[1];
3884 s->m.height= height;
3885 s->m.mb_width = block_width;
3886 s->m.mb_height= block_height;
3887 s->m.mb_stride= s->m.mb_width+1;
3888 s->m.b8_stride= 2*s->m.mb_width+1;
3890 s->m.pict_type= pict->pict_type;
3891 s->m.me_method= s->avctx->me_method;
3892 s->m.me.scene_change_score=0;
3893 s->m.flags= s->avctx->flags;
3894 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3895 s->m.out_format= FMT_H263;
3896 s->m.unrestricted_mv= 1;
3898 s->m.lambda = s->lambda;
3899 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3900 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3902 s->m.dsp= s->dsp; //move
3908 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3909 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3914 s->m.pict_type = pict->pict_type;
3915 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3918 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3919 encode_blocks(s, 1);
3920 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3922 for(plane_index=0; plane_index<3; plane_index++){
3923 Plane *p= &s->plane[plane_index];
3927 // int bits= put_bits_count(&s->c.pb);
3929 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3931 if(pict->data[plane_index]) //FIXME gray hack
3934 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3937 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3940 && pict->pict_type == P_TYPE
3941 && !(avctx->flags&CODEC_FLAG_PASS2)
3942 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3943 ff_init_range_encoder(c, buf, buf_size);
3944 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3945 pict->pict_type= FF_I_TYPE;
3947 s->current_picture.key_frame=1;
3951 if(s->qlog == LOSSLESS_QLOG){
3954 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3960 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3965 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3967 if(s->pass1_rc && plane_index==0){
3968 int delta_qlog = ratecontrol_1pass(s, pict);
3969 if (delta_qlog <= INT_MIN)
3972 //reordering qlog in the bitstream would eliminate this reset
3973 ff_init_range_encoder(c, buf, buf_size);
3974 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3975 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3977 encode_blocks(s, 0);
3981 for(level=0; level<s->spatial_decomposition_count; level++){
3982 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3983 SubBand *b= &p->band[level][orientation];
3985 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3987 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
3988 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3989 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3991 correlate(s, b, b->ibuf, b->stride, 1, 0);
3994 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3996 for(level=0; level<s->spatial_decomposition_count; level++){
3997 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3998 SubBand *b= &p->band[level][orientation];
4000 dequantize(s, b, b->ibuf, b->stride);
4004 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4005 if(s->qlog == LOSSLESS_QLOG){
4008 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4013 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4014 STOP_TIMER("pred-conv")}
4017 if(pict->pict_type == I_TYPE){
4020 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4021 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4025 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4026 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4029 if(s->avctx->flags&CODEC_FLAG_PSNR){
4032 if(pict->data[plane_index]) //FIXME gray hack
4035 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4039 s->avctx->error[plane_index] += error;
4040 s->current_picture.error[plane_index] = error;
4044 if(s->last_picture[s->max_ref_frames-1].data[0])
4045 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4047 s->current_picture.coded_picture_number = avctx->frame_number;
4048 s->current_picture.pict_type = pict->pict_type;
4049 s->current_picture.quality = pict->quality;
4050 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4051 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4052 s->m.current_picture.display_picture_number =
4053 s->m.current_picture.coded_picture_number = avctx->frame_number;
4054 s->m.current_picture.quality = pict->quality;
4055 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4057 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4059 if(avctx->flags&CODEC_FLAG_PASS1)
4060 ff_write_pass1_stats(&s->m);
4061 s->m.last_pict_type = s->m.pict_type;
4062 avctx->frame_bits = s->m.frame_bits;
4063 avctx->mv_bits = s->m.mv_bits;
4064 avctx->misc_bits = s->m.misc_bits;
4065 avctx->p_tex_bits = s->m.p_tex_bits;
4069 return ff_rac_terminate(c);
4072 static void common_end(SnowContext *s){
4073 int plane_index, level, orientation, i;
4075 av_freep(&s->spatial_dwt_buffer);
4076 av_freep(&s->spatial_idwt_buffer);
4078 av_freep(&s->m.me.scratchpad);
4079 av_freep(&s->m.me.map);
4080 av_freep(&s->m.me.score_map);
4081 av_freep(&s->m.obmc_scratchpad);
4083 av_freep(&s->block);
4085 for(i=0; i<MAX_REF_FRAMES; i++){
4086 av_freep(&s->ref_mvs[i]);
4087 av_freep(&s->ref_scores[i]);
4088 if(s->last_picture[i].data[0])
4089 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4092 for(plane_index=0; plane_index<3; plane_index++){
4093 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4094 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4095 SubBand *b= &s->plane[plane_index].band[level][orientation];
4097 av_freep(&b->x_coeff);
4103 static int encode_end(AVCodecContext *avctx)
4105 SnowContext *s = avctx->priv_data;
4108 av_free(avctx->stats_out);
4113 static int decode_init(AVCodecContext *avctx)
4115 SnowContext *s = avctx->priv_data;
4118 avctx->pix_fmt= PIX_FMT_YUV420P;
4122 block_size = MB_SIZE >> s->block_max_depth;
4123 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_idwt_buffer);
4128 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4129 SnowContext *s = avctx->priv_data;
4130 RangeCoder * const c= &s->c;
4132 AVFrame *picture = data;
4133 int level, orientation, plane_index;
4135 ff_init_range_decoder(c, buf, buf_size);
4136 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4138 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4140 if(!s->block) alloc_blocks(s);
4143 //keyframe flag dupliaction mess FIXME
4144 if(avctx->debug&FF_DEBUG_PICT_INFO)
4145 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4149 for(plane_index=0; plane_index<3; plane_index++){
4150 Plane *p= &s->plane[plane_index];
4154 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4156 if(s->avctx->debug&2048){
4157 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4158 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4162 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4163 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4169 for(level=0; level<s->spatial_decomposition_count; level++){
4170 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4171 SubBand *b= &p->band[level][orientation];
4172 unpack_coeffs(s, b, b->parent, orientation);
4175 STOP_TIMER("unpack coeffs");
4179 const int mb_h= s->b_height << s->block_max_depth;
4180 const int block_size = MB_SIZE >> s->block_max_depth;
4181 const int block_w = plane_index ? block_size/2 : block_size;
4183 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4188 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4189 for(mb_y=0; mb_y<=mb_h; mb_y++){
4191 int slice_starty = block_w*mb_y;
4192 int slice_h = block_w*(mb_y+1);
4193 if (!(s->keyframe || s->avctx->debug&512)){
4194 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4195 slice_h -= (block_w >> 1);
4200 for(level=0; level<s->spatial_decomposition_count; level++){
4201 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4202 SubBand *b= &p->band[level][orientation];
4205 int our_mb_start = mb_y;
4206 int our_mb_end = (mb_y + 1);
4208 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4209 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4210 if (!(s->keyframe || s->avctx->debug&512)){
4211 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4212 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4214 start_y = FFMIN(b->height, start_y);
4215 end_y = FFMIN(b->height, end_y);
4217 if (start_y != end_y){
4218 if (orientation == 0){
4219 SubBand * correlate_band = &p->band[0][0];
4220 int correlate_end_y = FFMIN(b->height, end_y + 1);
4221 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4222 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4223 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4224 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4227 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4231 STOP_TIMER("decode_subband_slice");
4235 for(; yd<slice_h; yd+=4){
4236 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4238 STOP_TIMER("idwt slice");}
4241 if(s->qlog == LOSSLESS_QLOG){
4242 for(; yq<slice_h && yq<h; yq++){
4243 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4245 line[x] <<= FRAC_BITS;
4250 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4252 y = FFMIN(p->height, slice_starty);
4253 end_y = FFMIN(p->height, slice_h);
4255 slice_buffer_release(&s->sb, y++);
4258 slice_buffer_flush(&s->sb);
4260 STOP_TIMER("idwt + predict_slices")}
4265 if(s->last_picture[s->max_ref_frames-1].data[0])
4266 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4268 if(!(s->avctx->debug&2048))
4269 *picture= s->current_picture;
4271 *picture= s->mconly_picture;
4273 *data_size = sizeof(AVFrame);
4275 bytes_read= c->bytestream - c->bytestream_start;
4276 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4281 static int decode_end(AVCodecContext *avctx)
4283 SnowContext *s = avctx->priv_data;
4285 slice_buffer_destroy(&s->sb);
4292 AVCodec snow_decoder = {
4296 sizeof(SnowContext),
4301 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4305 #ifdef CONFIG_SNOW_ENCODER
4306 AVCodec snow_encoder = {
4310 sizeof(SnowContext),
4327 int buffer[2][width*height];
4330 s.spatial_decomposition_count=6;
4331 s.spatial_decomposition_type=1;
4333 printf("testing 5/3 DWT\n");
4334 for(i=0; i<width*height; i++)
4335 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4337 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4338 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4340 for(i=0; i<width*height; i++)
4341 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4343 printf("testing 9/7 DWT\n");
4344 s.spatial_decomposition_type=0;
4345 for(i=0; i<width*height; i++)
4346 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4348 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4349 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4351 for(i=0; i<width*height; i++)
4352 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4355 printf("testing AC coder\n");
4356 memset(s.header_state, 0, sizeof(s.header_state));
4357 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4358 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4360 for(i=-256; i<256; i++){
4362 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4363 STOP_TIMER("put_symbol")
4365 ff_rac_terminate(&s.c);
4367 memset(s.header_state, 0, sizeof(s.header_state));
4368 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4369 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4371 for(i=-256; i<256; i++){
4374 j= get_symbol(&s.c, s.header_state, 1);
4375 STOP_TIMER("get_symbol")
4376 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4380 int level, orientation, x, y;
4381 int64_t errors[8][4];
4384 memset(errors, 0, sizeof(errors));
4385 s.spatial_decomposition_count=3;
4386 s.spatial_decomposition_type=0;
4387 for(level=0; level<s.spatial_decomposition_count; level++){
4388 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4389 int w= width >> (s.spatial_decomposition_count-level);
4390 int h= height >> (s.spatial_decomposition_count-level);
4391 int stride= width << (s.spatial_decomposition_count-level);
4392 DWTELEM *buf= buffer[0];
4395 if(orientation&1) buf+=w;
4396 if(orientation>1) buf+=stride>>1;
4398 memset(buffer[0], 0, sizeof(int)*width*height);
4399 buf[w/2 + h/2*stride]= 256*256;
4400 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4401 for(y=0; y<height; y++){
4402 for(x=0; x<width; x++){
4403 int64_t d= buffer[0][x + y*width];
4405 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4407 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4409 error= (int)(sqrt(error)+0.5);
4410 errors[level][orientation]= error;
4411 if(g) g=ff_gcd(g, error);
4415 printf("static int const visual_weight[][4]={\n");
4416 for(level=0; level<s.spatial_decomposition_count; level++){
4418 for(orientation=0; orientation<4; orientation++){
4419 printf("%8"PRId64",", errors[level][orientation]/g);
4427 int w= width >> (s.spatial_decomposition_count-level);
4428 int h= height >> (s.spatial_decomposition_count-level);
4429 int stride= width << (s.spatial_decomposition_count-level);
4430 DWTELEM *buf= buffer[0];
4436 memset(buffer[0], 0, sizeof(int)*width*height);
4438 for(y=0; y<height; y++){
4439 for(x=0; x<width; x++){
4440 int tab[4]={0,2,3,1};
4441 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4444 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4448 buf[x + y*stride ]=169;
4449 buf[x + y*stride-w]=64;
4452 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4454 for(y=0; y<height; y++){
4455 for(x=0; x<width; x++){
4456 int64_t d= buffer[0][x + y*width];
4458 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4460 if(FFABS(height/2-y)<9) printf("\n");