2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 typedef struct x_and_coeff{
402 typedef struct SubBand{
407 int qlog; ///< log(qscale)/log[2^(1/6)]
411 int stride_line; ///< Stride measured in lines, not pixels.
412 x_and_coeff * x_coeff;
413 struct SubBand *parent;
414 uint8_t state[/*7*2*/ 7 + 512][32];
417 typedef struct Plane{
420 SubBand band[MAX_DECOMPOSITIONS][4];
423 typedef struct SnowContext{
424 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
426 AVCodecContext *avctx;
430 AVFrame input_picture; ///< new_picture with the internal linesizes
431 AVFrame current_picture;
432 AVFrame last_picture[MAX_REF_FRAMES];
433 AVFrame mconly_picture;
434 // uint8_t q_context[16];
435 uint8_t header_state[32];
436 uint8_t block_state[128 + 32*128];
440 int spatial_decomposition_type;
441 int last_spatial_decomposition_type;
442 int temporal_decomposition_type;
443 int spatial_decomposition_count;
444 int temporal_decomposition_count;
447 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
448 uint32_t *ref_scores[MAX_REF_FRAMES];
449 DWTELEM *spatial_dwt_buffer;
453 int spatial_scalability;
463 #define QBIAS_SHIFT 3
467 int last_block_max_depth;
468 Plane plane[MAX_PLANES];
470 #define ME_CACHE_SIZE 1024
471 int me_cache[ME_CACHE_SIZE];
472 int me_cache_generation;
475 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
486 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
487 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
489 static void iterative_me(SnowContext *s);
491 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
495 buf->base_buffer = base_buffer;
496 buf->line_count = line_count;
497 buf->line_width = line_width;
498 buf->data_count = max_allocated_lines;
499 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
500 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
502 for (i = 0; i < max_allocated_lines; i++)
504 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
507 buf->data_stack_top = max_allocated_lines - 1;
510 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
515 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
517 assert(buf->data_stack_top >= 0);
518 // assert(!buf->line[line]);
520 return buf->line[line];
522 offset = buf->line_width * line;
523 buffer = buf->data_stack[buf->data_stack_top];
524 buf->data_stack_top--;
525 buf->line[line] = buffer;
527 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
532 static void slice_buffer_release(slice_buffer * buf, int line)
537 assert(line >= 0 && line < buf->line_count);
538 assert(buf->line[line]);
540 offset = buf->line_width * line;
541 buffer = buf->line[line];
542 buf->data_stack_top++;
543 buf->data_stack[buf->data_stack_top] = buffer;
544 buf->line[line] = NULL;
546 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
549 static void slice_buffer_flush(slice_buffer * buf)
552 for (i = 0; i < buf->line_count; i++)
556 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
557 slice_buffer_release(buf, i);
562 static void slice_buffer_destroy(slice_buffer * buf)
565 slice_buffer_flush(buf);
567 for (i = buf->data_count - 1; i >= 0; i--)
569 assert(buf->data_stack[i]);
570 av_freep(&buf->data_stack[i]);
572 assert(buf->data_stack);
573 av_freep(&buf->data_stack);
575 av_freep(&buf->line);
579 // Avoid a name clash on SGI IRIX
582 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
583 static uint8_t qexp[QROOT];
585 static inline int mirror(int v, int m){
586 while((unsigned)v > (unsigned)m){
593 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
597 const int a= FFABS(v);
598 const int e= av_log2(a);
600 const int el= FFMIN(e, 10);
601 put_rac(c, state+0, 0);
604 put_rac(c, state+1+i, 1); //1..10
607 put_rac(c, state+1+9, 1); //1..10
609 put_rac(c, state+1+FFMIN(i,9), 0);
611 for(i=e-1; i>=el; i--){
612 put_rac(c, state+22+9, (a>>i)&1); //22..31
615 put_rac(c, state+22+i, (a>>i)&1); //22..31
619 put_rac(c, state+11 + el, v < 0); //11..21
622 put_rac(c, state+0, 0);
625 put_rac(c, state+1+i, 1); //1..10
627 put_rac(c, state+1+i, 0);
629 for(i=e-1; i>=0; i--){
630 put_rac(c, state+22+i, (a>>i)&1); //22..31
634 put_rac(c, state+11 + e, v < 0); //11..21
637 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
639 put_rac(c, state+1+FFMIN(i,9), 0);
641 for(i=e-1; i>=0; i--){
642 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
646 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
650 put_rac(c, state+0, 1);
654 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
655 if(get_rac(c, state+0))
660 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
665 for(i=e-1; i>=0; i--){
666 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
669 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
676 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
678 int r= log2>=0 ? 1<<log2 : 1;
684 put_rac(c, state+4+log2, 1);
689 put_rac(c, state+4+log2, 0);
691 for(i=log2-1; i>=0; i--){
692 put_rac(c, state+31-i, (v>>i)&1);
696 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
698 int r= log2>=0 ? 1<<log2 : 1;
703 while(get_rac(c, state+4+log2)){
709 for(i=log2-1; i>=0; i--){
710 v+= get_rac(c, state+31-i)<<i;
716 static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
717 const int mirror_left= !highpass;
718 const int mirror_right= (width&1) ^ highpass;
719 const int w= (width>>1) - 1 + (highpass & width);
722 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
724 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
730 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
734 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
739 static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
740 const int mirror_left= !highpass;
741 const int mirror_right= (width&1) ^ highpass;
742 const int w= (width>>1) - 1 + (highpass & width);
749 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
755 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
758 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
762 int r= 3*2*ref[w*ref_step];
765 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
771 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
772 const int mirror_left= !highpass;
773 const int mirror_right= (width&1) ^ highpass;
774 const int w= (width>>1) - 1 + (highpass & width);
778 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
780 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
786 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
790 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
796 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
799 for(x=start; x<width; x+=2){
803 int x2= x + 2*i - n + 1;
805 else if(x2>=width) x2= 2*width-x2-2;
806 sum += coeffs[i]*(int64_t)dst[x2];
808 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
809 else dst[x] += (sum + (1<<shift)/2)>>shift;
813 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
815 for(y=start; y<height; y+=2){
816 for(x=0; x<width; x++){
820 int y2= y + 2*i - n + 1;
822 else if(y2>=height) y2= 2*height-y2-2;
823 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
825 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
826 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
835 #if 0 // more accurate 9/7
838 #define COEFFS1 (int[]){-25987,-25987}
841 #define COEFFS2 (int[]){-27777,-27777}
844 #define COEFFS3 (int[]){28931,28931}
847 #define COEFFS4 (int[]){14533,14533}
851 #define COEFFS1 (int[]){1,-9,-9,1}
854 #define COEFFS2 (int[]){-1,5,5,-1}
867 #define COEFFS1 (int[]){1,1}
870 #define COEFFS2 (int[]){-1,-1}
883 #define COEFFS2 (int[]){-1,-1}
886 #define COEFFS3 (int[]){-1,-1}
889 #define COEFFS4 (int[]){-5,29,29,-5}
894 #define COEFFS1 (int[]){-203,-203}
897 #define COEFFS2 (int[]){-217,-217}
900 #define COEFFS3 (int[]){113,113}
903 #define COEFFS4 (int[]){227,227}
911 #define COEFFS2 (int[]){-1,-1}
914 #define COEFFS3 (int[]){-1,-1}
917 #define COEFFS4 (int[]){3,3}
921 #define COEFFS1 (int[]){1,-9,-9,1}
924 #define COEFFS2 (int[]){1,1}
934 #define COEFFS1 (int[]){1,-9,-9,1}
937 #define COEFFS2 (int[]){-1,5,5,-1}
945 static void horizontal_decomposeX(DWTELEM *b, int width){
947 const int width2= width>>1;
948 const int w2= (width+1)>>1;
951 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
952 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
953 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
954 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
956 for(x=0; x<width2; x++){
958 temp[x+w2]= b[2*x + 1];
962 memcpy(b, temp, width*sizeof(int));
965 static void horizontal_composeX(DWTELEM *b, int width){
967 const int width2= width>>1;
969 const int w2= (width+1)>>1;
971 memcpy(temp, b, width*sizeof(int));
972 for(x=0; x<width2; x++){
974 b[2*x + 1]= temp[x+w2];
979 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
980 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
981 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
982 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
985 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
988 for(y=0; y<height; y++){
989 for(x=0; x<width; x++){
990 buffer[y*stride + x] *= SCALEX;
994 for(y=0; y<height; y++){
995 horizontal_decomposeX(buffer + y*stride, width);
998 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
999 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1000 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1004 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1007 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1008 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1009 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1012 for(y=0; y<height; y++){
1013 horizontal_composeX(buffer + y*stride, width);
1016 for(y=0; y<height; y++){
1017 for(x=0; x<width; x++){
1018 buffer[y*stride + x] /= SCALEX;
1023 static void horizontal_decompose53i(DWTELEM *b, int width){
1024 DWTELEM temp[width];
1025 const int width2= width>>1;
1027 const int w2= (width+1)>>1;
1029 for(x=0; x<width2; x++){
1031 temp[x+w2]= b[2*x + 1];
1045 for(x=1; x+1<width2; x+=2){
1049 A2 += (A1 + A3 + 2)>>2;
1053 A1= temp[x+1+width2];
1056 A4 += (A1 + A3 + 2)>>2;
1062 A2 += (A1 + A3 + 2)>>2;
1067 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1068 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1072 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075 for(i=0; i<width; i++){
1076 b1[i] -= (b0[i] + b2[i])>>1;
1080 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1083 for(i=0; i<width; i++){
1084 b1[i] += (b0[i] + b2[i] + 2)>>2;
1088 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1090 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1091 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1093 for(y=-2; y<height; y+=2){
1094 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1095 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1099 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1100 STOP_TIMER("horizontal_decompose53i")}
1103 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1104 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1105 STOP_TIMER("vertical_decompose53i*")}
1112 static void horizontal_decompose97i(DWTELEM *b, int width){
1113 DWTELEM temp[width];
1114 const int w2= (width+1)>>1;
1116 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1117 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1118 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1119 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1123 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1126 for(i=0; i<width; i++){
1127 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1131 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1134 for(i=0; i<width; i++){
1136 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1138 int r= 3*(b0[i] + b2[i]);
1141 b1[i] += (r+W_CO)>>W_CS;
1146 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1149 for(i=0; i<width; i++){
1151 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1153 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1158 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1161 for(i=0; i<width; i++){
1162 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1166 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1168 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1169 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1170 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1171 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1173 for(y=-4; y<height; y+=2){
1174 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1175 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1178 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1179 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1181 STOP_TIMER("horizontal_decompose97i")
1185 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1186 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1187 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1188 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1191 STOP_TIMER("vertical_decompose97i")
1201 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1204 for(level=0; level<decomposition_count; level++){
1206 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1207 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1208 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1213 static void horizontal_compose53i(DWTELEM *b, int width){
1214 DWTELEM temp[width];
1215 const int width2= width>>1;
1216 const int w2= (width+1)>>1;
1228 for(x=1; x+1<width2; x+=2){
1232 A2 += (A1 + A3 + 2)>>2;
1236 A1= temp[x+1+width2];
1239 A4 += (A1 + A3 + 2)>>2;
1245 A2 += (A1 + A3 + 2)>>2;
1249 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1250 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1252 for(x=0; x<width2; x++){
1254 b[2*x + 1]= temp[x+w2];
1260 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1263 for(i=0; i<width; i++){
1264 b1[i] += (b0[i] + b2[i])>>1;
1268 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1271 for(i=0; i<width; i++){
1272 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1276 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1277 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1278 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1282 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1283 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1284 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1288 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1291 DWTELEM *b0= cs->b0;
1292 DWTELEM *b1= cs->b1;
1293 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1294 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1297 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1298 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1299 STOP_TIMER("vertical_compose53i*")}
1302 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1303 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1304 STOP_TIMER("horizontal_compose53i")}
1311 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1313 DWTELEM *b0= cs->b0;
1314 DWTELEM *b1= cs->b1;
1315 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1316 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1319 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1320 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1321 STOP_TIMER("vertical_compose53i*")}
1324 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1325 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1326 STOP_TIMER("horizontal_compose53i")}
1333 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1335 spatial_compose53i_init(&cs, buffer, height, stride);
1336 while(cs.y <= height)
1337 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1341 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1342 DWTELEM temp[width];
1343 const int w2= (width+1)>>1;
1345 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1346 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1347 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1348 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1351 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1354 for(i=0; i<width; i++){
1355 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1359 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1362 for(i=0; i<width; i++){
1364 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1366 int r= 3*(b0[i] + b2[i]);
1369 b1[i] -= (r+W_CO)>>W_CS;
1374 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1377 for(i=0; i<width; i++){
1379 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1381 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1386 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1389 for(i=0; i<width; i++){
1390 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1394 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1397 for(i=0; i<width; i++){
1401 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1403 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1405 r= 3*(b2[i] + b4[i]);
1408 b3[i] -= (r+W_CO)>>W_CS;
1411 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1413 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1415 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1419 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1420 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1421 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1422 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1423 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1427 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1428 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1429 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1430 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1431 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1435 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1438 DWTELEM *b0= cs->b0;
1439 DWTELEM *b1= cs->b1;
1440 DWTELEM *b2= cs->b2;
1441 DWTELEM *b3= cs->b3;
1442 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1443 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1446 if(y>0 && y+4<height){
1447 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1449 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1450 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1451 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1452 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1455 STOP_TIMER("vertical_compose97i")}}
1458 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1459 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1460 if(width>400 && y+0<(unsigned)height){
1461 STOP_TIMER("horizontal_compose97i")}}
1470 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1472 DWTELEM *b0= cs->b0;
1473 DWTELEM *b1= cs->b1;
1474 DWTELEM *b2= cs->b2;
1475 DWTELEM *b3= cs->b3;
1476 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1477 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1480 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1481 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1482 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1483 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1485 STOP_TIMER("vertical_compose97i")}}
1488 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1489 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1490 if(width>400 && b0 <= b2){
1491 STOP_TIMER("horizontal_compose97i")}}
1500 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1502 spatial_compose97i_init(&cs, buffer, height, stride);
1503 while(cs.y <= height)
1504 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1507 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1509 for(level=decomposition_count-1; level>=0; level--){
1511 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1512 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1513 /* not slicified yet */
1514 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1515 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1520 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1522 for(level=decomposition_count-1; level>=0; level--){
1524 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1525 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1526 /* not slicified yet */
1527 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1532 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1533 const int support = type==1 ? 3 : 5;
1537 for(level=decomposition_count-1; level>=0; level--){
1538 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1540 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1542 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1550 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1551 const int support = type==1 ? 3 : 5;
1555 for(level=decomposition_count-1; level>=0; level--){
1556 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1558 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1560 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1568 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1571 for(level=decomposition_count-1; level>=0; level--)
1572 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1574 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1576 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1577 for(y=0; y<height; y+=4)
1578 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1582 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1583 const int w= b->width;
1584 const int h= b->height;
1596 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1597 v= src[x + y*stride];
1600 t= src[x + (y-1)*stride];
1602 lt= src[x - 1 + (y-1)*stride];
1605 rt= src[x + 1 + (y-1)*stride];
1609 l= src[x - 1 + y*stride];
1611 if(orientation==1) ll= src[y + (x-2)*stride];
1612 else ll= src[x - 2 + y*stride];
1618 if(px<b->parent->width && py<b->parent->height)
1619 p= parent[px + py*2*stride];
1621 if(!(/*ll|*/l|lt|t|rt|p)){
1623 runs[run_index++]= run;
1631 max_index= run_index;
1632 runs[run_index++]= run;
1634 run= runs[run_index++];
1636 put_symbol2(&s->c, b->state[30], max_index, 0);
1637 if(run_index <= max_index)
1638 put_symbol2(&s->c, b->state[1], run, 3);
1641 if(s->c.bytestream_end - s->c.bytestream < w*40){
1642 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1647 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1648 v= src[x + y*stride];
1651 t= src[x + (y-1)*stride];
1653 lt= src[x - 1 + (y-1)*stride];
1656 rt= src[x + 1 + (y-1)*stride];
1660 l= src[x - 1 + y*stride];
1662 if(orientation==1) ll= src[y + (x-2)*stride];
1663 else ll= src[x - 2 + y*stride];
1669 if(px<b->parent->width && py<b->parent->height)
1670 p= parent[px + py*2*stride];
1672 if(/*ll|*/l|lt|t|rt|p){
1673 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1675 put_rac(&s->c, &b->state[0][context], !!v);
1678 run= runs[run_index++];
1680 if(run_index <= max_index)
1681 put_symbol2(&s->c, b->state[1], run, 3);
1689 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1690 int l2= 2*FFABS(l) + (l<0);
1691 int t2= 2*FFABS(t) + (t<0);
1693 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1694 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1702 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1703 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1704 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1705 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1706 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1709 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1710 const int w= b->width;
1711 const int h= b->height;
1716 x_and_coeff *xc= b->x_coeff;
1717 x_and_coeff *prev_xc= NULL;
1718 x_and_coeff *prev2_xc= xc;
1719 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1720 x_and_coeff *prev_parent_xc= parent_xc;
1722 runs= get_symbol2(&s->c, b->state[30], 0);
1723 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1728 int lt=0, t=0, rt=0;
1730 if(y && prev_xc->x == 0){
1742 if(prev_xc->x == x + 1)
1748 if(x>>1 > parent_xc->x){
1751 if(x>>1 == parent_xc->x){
1752 p= parent_xc->coeff;
1755 if(/*ll|*/l|lt|t|rt|p){
1756 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1758 v=get_rac(&s->c, &b->state[0][context]);
1760 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1761 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1768 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1770 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1771 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1780 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1781 else max_run= FFMIN(run, w-x-1);
1783 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1789 (xc++)->x= w+1; //end marker
1795 while(parent_xc->x != parent->width+1)
1798 prev_parent_xc= parent_xc;
1800 parent_xc= prev_parent_xc;
1805 (xc++)->x= w+1; //end marker
1809 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1810 const int w= b->width;
1812 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1813 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1814 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1819 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1824 /* If we are on the second or later slice, restore our index. */
1826 new_index = save_state[0];
1829 for(y=start_y; y<h; y++){
1832 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1833 memset(line, 0, b->width*sizeof(DWTELEM));
1834 v = b->x_coeff[new_index].coeff;
1835 x = b->x_coeff[new_index++].x;
1838 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1839 register int u= -(v&1);
1840 line[x] = (t^u) - u;
1842 v = b->x_coeff[new_index].coeff;
1843 x = b->x_coeff[new_index++].x;
1846 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1847 STOP_TIMER("decode_subband")
1850 /* Save our variables for the next slice. */
1851 save_state[0] = new_index;
1856 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1857 int plane_index, level, orientation;
1859 for(plane_index=0; plane_index<3; plane_index++){
1860 for(level=0; level<s->spatial_decomposition_count; level++){
1861 for(orientation=level ? 1:0; orientation<4; orientation++){
1862 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1866 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1867 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1870 static int alloc_blocks(SnowContext *s){
1871 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1872 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1877 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1881 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1882 uint8_t *bytestream= d->bytestream;
1883 uint8_t *bytestream_start= d->bytestream_start;
1885 d->bytestream= bytestream;
1886 d->bytestream_start= bytestream_start;
1889 //near copy & paste from dsputil, FIXME
1890 static int pix_sum(uint8_t * pix, int line_size, int w)
1895 for (i = 0; i < w; i++) {
1896 for (j = 0; j < w; j++) {
1900 pix += line_size - w;
1905 //near copy & paste from dsputil, FIXME
1906 static int pix_norm1(uint8_t * pix, int line_size, int w)
1909 uint32_t *sq = ff_squareTbl + 256;
1912 for (i = 0; i < w; i++) {
1913 for (j = 0; j < w; j ++) {
1917 pix += line_size - w;
1922 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1923 const int w= s->b_width << s->block_max_depth;
1924 const int rem_depth= s->block_max_depth - level;
1925 const int index= (x + y*w) << rem_depth;
1926 const int block_w= 1<<rem_depth;
1939 for(j=0; j<block_w; j++){
1940 for(i=0; i<block_w; i++){
1941 s->block[index + i + j*w]= block;
1946 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1947 const int offset[3]= {
1949 ((y*c->uvstride + x)>>1),
1950 ((y*c->uvstride + x)>>1),
1954 c->src[0][i]= src [i];
1955 c->ref[0][i]= ref [i] + offset[i];
1960 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1961 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1962 if(s->ref_frames == 1){
1963 *mx = mid_pred(left->mx, top->mx, tr->mx);
1964 *my = mid_pred(left->my, top->my, tr->my);
1966 const int *scale = scale_mv_ref[ref];
1967 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1968 (top ->mx * scale[top ->ref] + 128) >>8,
1969 (tr ->mx * scale[tr ->ref] + 128) >>8);
1970 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1971 (top ->my * scale[top ->ref] + 128) >>8,
1972 (tr ->my * scale[tr ->ref] + 128) >>8);
1979 #define P_TOPRIGHT P[3]
1980 #define P_MEDIAN P[4]
1982 #define FLAG_QPEL 1 //must be 1
1984 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1985 uint8_t p_buffer[1024];
1986 uint8_t i_buffer[1024];
1987 uint8_t p_state[sizeof(s->block_state)];
1988 uint8_t i_state[sizeof(s->block_state)];
1990 uint8_t *pbbak= s->c.bytestream;
1991 uint8_t *pbbak_start= s->c.bytestream_start;
1992 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1993 const int w= s->b_width << s->block_max_depth;
1994 const int h= s->b_height << s->block_max_depth;
1995 const int rem_depth= s->block_max_depth - level;
1996 const int index= (x + y*w) << rem_depth;
1997 const int block_w= 1<<(LOG2_MB_SIZE - level);
1998 int trx= (x+1)<<rem_depth;
1999 int try= (y+1)<<rem_depth;
2000 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2001 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2002 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2003 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2004 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2005 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2006 int pl = left->color[0];
2007 int pcb= left->color[1];
2008 int pcr= left->color[2];
2012 const int stride= s->current_picture.linesize[0];
2013 const int uvstride= s->current_picture.linesize[1];
2014 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2015 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2016 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2018 int16_t last_mv[3][2];
2019 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2020 const int shift= 1+qpel;
2021 MotionEstContext *c= &s->m.me;
2022 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2023 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2024 int my_context= av_log2(2*FFABS(left->my - top->my));
2025 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2026 int ref, best_ref, ref_score, ref_mx, ref_my;
2028 assert(sizeof(s->block_state) >= 256);
2030 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2034 // clip predictors / edge ?
2036 P_LEFT[0]= left->mx;
2037 P_LEFT[1]= left->my;
2040 P_TOPRIGHT[0]= tr->mx;
2041 P_TOPRIGHT[1]= tr->my;
2043 last_mv[0][0]= s->block[index].mx;
2044 last_mv[0][1]= s->block[index].my;
2045 last_mv[1][0]= right->mx;
2046 last_mv[1][1]= right->my;
2047 last_mv[2][0]= bottom->mx;
2048 last_mv[2][1]= bottom->my;
2055 assert(c-> stride == stride);
2056 assert(c->uvstride == uvstride);
2058 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2059 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2060 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2061 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2063 c->xmin = - x*block_w - 16+2;
2064 c->ymin = - y*block_w - 16+2;
2065 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2066 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2068 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2069 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2070 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2071 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2072 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2073 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2074 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2076 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2077 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2080 c->pred_x= P_LEFT[0];
2081 c->pred_y= P_LEFT[1];
2083 c->pred_x = P_MEDIAN[0];
2084 c->pred_y = P_MEDIAN[1];
2089 for(ref=0; ref<s->ref_frames; ref++){
2090 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2092 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2093 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2095 assert(ref_mx >= c->xmin);
2096 assert(ref_mx <= c->xmax);
2097 assert(ref_my >= c->ymin);
2098 assert(ref_my <= c->ymax);
2100 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2101 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2102 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2103 if(s->ref_mvs[ref]){
2104 s->ref_mvs[ref][index][0]= ref_mx;
2105 s->ref_mvs[ref][index][1]= ref_my;
2106 s->ref_scores[ref][index]= ref_score;
2108 if(score > ref_score){
2115 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2118 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2120 pc.bytestream_start=
2121 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2122 memcpy(p_state, s->block_state, sizeof(s->block_state));
2124 if(level!=s->block_max_depth)
2125 put_rac(&pc, &p_state[4 + s_context], 1);
2126 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2127 if(s->ref_frames > 1)
2128 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2129 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2130 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2131 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2132 p_len= pc.bytestream - pc.bytestream_start;
2133 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2135 block_s= block_w*block_w;
2136 sum = pix_sum(current_data[0], stride, block_w);
2137 l= (sum + block_s/2)/block_s;
2138 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2140 block_s= block_w*block_w>>2;
2141 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2142 cb= (sum + block_s/2)/block_s;
2143 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2144 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2145 cr= (sum + block_s/2)/block_s;
2146 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2149 ic.bytestream_start=
2150 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2151 memcpy(i_state, s->block_state, sizeof(s->block_state));
2152 if(level!=s->block_max_depth)
2153 put_rac(&ic, &i_state[4 + s_context], 1);
2154 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2155 put_symbol(&ic, &i_state[32], l-pl , 1);
2156 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2157 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2158 i_len= ic.bytestream - ic.bytestream_start;
2159 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2161 // assert(score==256*256*256*64-1);
2162 assert(iscore < 255*255*256 + s->lambda2*10);
2163 assert(iscore >= 0);
2164 assert(l>=0 && l<=255);
2165 assert(pl>=0 && pl<=255);
2168 int varc= iscore >> 8;
2169 int vard= score >> 8;
2170 if (vard <= 64 || vard < varc)
2171 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2173 c->scene_change_score+= s->m.qscale;
2176 if(level!=s->block_max_depth){
2177 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2178 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2179 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2180 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2181 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2182 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2184 if(score2 < score && score2 < iscore)
2189 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2190 memcpy(pbbak, i_buffer, i_len);
2192 s->c.bytestream_start= pbbak_start;
2193 s->c.bytestream= pbbak + i_len;
2194 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2195 memcpy(s->block_state, i_state, sizeof(s->block_state));
2198 memcpy(pbbak, p_buffer, p_len);
2200 s->c.bytestream_start= pbbak_start;
2201 s->c.bytestream= pbbak + p_len;
2202 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2203 memcpy(s->block_state, p_state, sizeof(s->block_state));
2208 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2209 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2210 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2212 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2216 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2217 const int w= s->b_width << s->block_max_depth;
2218 const int rem_depth= s->block_max_depth - level;
2219 const int index= (x + y*w) << rem_depth;
2220 int trx= (x+1)<<rem_depth;
2221 BlockNode *b= &s->block[index];
2222 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2223 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2224 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2225 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2226 int pl = left->color[0];
2227 int pcb= left->color[1];
2228 int pcr= left->color[2];
2230 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2231 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2232 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2233 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2236 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2240 if(level!=s->block_max_depth){
2241 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2242 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2244 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2245 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2246 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2247 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2248 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2252 if(b->type & BLOCK_INTRA){
2253 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2254 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2255 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2256 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2257 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2258 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2260 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2261 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2262 if(s->ref_frames > 1)
2263 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2264 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2265 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2266 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2270 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2271 const int w= s->b_width << s->block_max_depth;
2272 const int rem_depth= s->block_max_depth - level;
2273 const int index= (x + y*w) << rem_depth;
2274 int trx= (x+1)<<rem_depth;
2275 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2276 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2277 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2278 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2279 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2282 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2286 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2288 int l = left->color[0];
2289 int cb= left->color[1];
2290 int cr= left->color[2];
2292 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2293 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2294 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2296 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2299 pred_mv(s, &mx, &my, 0, left, top, tr);
2300 l += get_symbol(&s->c, &s->block_state[32], 1);
2301 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2302 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2304 if(s->ref_frames > 1)
2305 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2306 pred_mv(s, &mx, &my, ref, left, top, tr);
2307 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2308 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2310 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2312 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2313 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2314 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2315 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2319 static void encode_blocks(SnowContext *s, int search){
2324 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2328 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2329 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2333 if(s->avctx->me_method == ME_ITER || !search)
2334 encode_q_branch2(s, 0, x, y);
2336 encode_q_branch (s, 0, x, y);
2341 static void decode_blocks(SnowContext *s){
2348 decode_q_branch(s, 0, x, y);
2353 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2356 for(y=0; y < b_h+5; y++){
2357 for(x=0; x < b_w; x++){
2364 // int am= 9*(a1+a2) - (a0+a3);
2365 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2366 // int am= 18*(a2+a3) - 2*(a1+a4);
2367 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2368 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2370 // if(b_w==16) am= 8*(a1+a2);
2372 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2373 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2375 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2376 if(am&(~255)) am= ~(am>>31);
2380 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2381 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2382 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2383 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2388 tmp -= (b_h+5)*stride;
2390 for(y=0; y < b_h; y++){
2391 for(x=0; x < b_w; x++){
2392 int a0= tmp[x + 0*stride];
2393 int a1= tmp[x + 1*stride];
2394 int a2= tmp[x + 2*stride];
2395 int a3= tmp[x + 3*stride];
2396 int a4= tmp[x + 4*stride];
2397 int a5= tmp[x + 5*stride];
2398 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2399 // int am= 18*(a2+a3) - 2*(a1+a4);
2400 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2401 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2403 // if(b_w==16) am= 8*(a1+a2);
2405 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2406 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2408 if(am&(~255)) am= ~(am>>31);
2411 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2412 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2413 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2414 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2419 STOP_TIMER("mc_block")
2422 #define mca(dx,dy,b_w)\
2423 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2424 uint8_t tmp[stride*(b_w+5)];\
2426 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2438 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2439 if(block->type & BLOCK_INTRA){
2441 const int color = block->color[plane_index];
2442 const int color4= color*0x01010101;
2444 for(y=0; y < b_h; y++){
2445 *(uint32_t*)&dst[0 + y*stride]= color4;
2446 *(uint32_t*)&dst[4 + y*stride]= color4;
2447 *(uint32_t*)&dst[8 + y*stride]= color4;
2448 *(uint32_t*)&dst[12+ y*stride]= color4;
2449 *(uint32_t*)&dst[16+ y*stride]= color4;
2450 *(uint32_t*)&dst[20+ y*stride]= color4;
2451 *(uint32_t*)&dst[24+ y*stride]= color4;
2452 *(uint32_t*)&dst[28+ y*stride]= color4;
2455 for(y=0; y < b_h; y++){
2456 *(uint32_t*)&dst[0 + y*stride]= color4;
2457 *(uint32_t*)&dst[4 + y*stride]= color4;
2458 *(uint32_t*)&dst[8 + y*stride]= color4;
2459 *(uint32_t*)&dst[12+ y*stride]= color4;
2462 for(y=0; y < b_h; y++){
2463 *(uint32_t*)&dst[0 + y*stride]= color4;
2464 *(uint32_t*)&dst[4 + y*stride]= color4;
2467 for(y=0; y < b_h; y++){
2468 *(uint32_t*)&dst[0 + y*stride]= color4;
2471 for(y=0; y < b_h; y++){
2472 for(x=0; x < b_w; x++){
2473 dst[x + y*stride]= color;
2478 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2479 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2480 int mx= block->mx*scale;
2481 int my= block->my*scale;
2482 const int dx= mx&15;
2483 const int dy= my&15;
2484 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2487 src += sx + sy*stride;
2488 if( (unsigned)sx >= w - b_w - 4
2489 || (unsigned)sy >= h - b_h - 4){
2490 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2493 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2494 // assert(!(b_w&(b_w-1)));
2495 assert(b_w>1 && b_h>1);
2496 assert(tab_index>=0 && tab_index<4 || b_w==32);
2497 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2498 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2501 for(y=0; y<b_h; y+=16){
2502 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2503 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2506 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2507 else if(b_w==2*b_h){
2508 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2509 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2512 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2513 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2518 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2519 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2522 for(y=0; y<b_h; y++){
2523 //FIXME ugly missue of obmc_stride
2524 const uint8_t *obmc1= obmc + y*obmc_stride;
2525 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2526 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2527 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2528 dst = slice_buffer_get_line(sb, src_y + y);
2529 for(x=0; x<b_w; x++){
2530 int v= obmc1[x] * block[3][x + y*src_stride]
2531 +obmc2[x] * block[2][x + y*src_stride]
2532 +obmc3[x] * block[1][x + y*src_stride]
2533 +obmc4[x] * block[0][x + y*src_stride];
2535 v <<= 8 - LOG2_OBMC_MAX;
2537 v += 1<<(7 - FRAC_BITS);
2538 v >>= 8 - FRAC_BITS;
2541 v += dst[x + src_x];
2542 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2543 if(v&(~255)) v= ~(v>>31);
2544 dst8[x + y*src_stride] = v;
2546 dst[x + src_x] -= v;
2552 //FIXME name clenup (b_w, block_w, b_width stuff)
2553 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2554 const int b_width = s->b_width << s->block_max_depth;
2555 const int b_height= s->b_height << s->block_max_depth;
2556 const int b_stride= b_width;
2557 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2558 BlockNode *rt= lt+1;
2559 BlockNode *lb= lt+b_stride;
2560 BlockNode *rb= lb+1;
2562 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2563 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2570 }else if(b_x + 1 >= b_width){
2577 }else if(b_y + 1 >= b_height){
2582 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2585 if(!sliced && !offset_dst)
2588 }else if(src_x + b_w > w){
2592 obmc -= src_y*obmc_stride;
2594 if(!sliced && !offset_dst)
2595 dst -= src_y*dst_stride;
2597 }else if(src_y + b_h> h){
2601 if(b_w<=0 || b_h<=0) return;
2603 assert(src_stride > 2*MB_SIZE + 5);
2604 if(!sliced && offset_dst)
2605 dst += src_x + src_y*dst_stride;
2606 dst8+= src_x + src_y*src_stride;
2607 // src += src_x + src_y*src_stride;
2609 ptmp= tmp + 3*tmp_step;
2612 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2614 if(same_block(lt, rt)){
2619 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2622 if(same_block(lt, lb)){
2624 }else if(same_block(rt, lb)){
2629 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2632 if(same_block(lt, rb) ){
2634 }else if(same_block(rt, rb)){
2636 }else if(same_block(lb, rb)){
2640 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2643 for(y=0; y<b_h; y++){
2644 for(x=0; x<b_w; x++){
2645 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2646 if(add) dst[x + y*dst_stride] += v;
2647 else dst[x + y*dst_stride] -= v;
2650 for(y=0; y<b_h; y++){
2651 uint8_t *obmc2= obmc + (obmc_stride>>1);
2652 for(x=0; x<b_w; x++){
2653 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2654 if(add) dst[x + y*dst_stride] += v;
2655 else dst[x + y*dst_stride] -= v;
2658 for(y=0; y<b_h; y++){
2659 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2660 for(x=0; x<b_w; x++){
2661 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2662 if(add) dst[x + y*dst_stride] += v;
2663 else dst[x + y*dst_stride] -= v;
2666 for(y=0; y<b_h; y++){
2667 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2668 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2669 for(x=0; x<b_w; x++){
2670 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2671 if(add) dst[x + y*dst_stride] += v;
2672 else dst[x + y*dst_stride] -= v;
2679 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2680 STOP_TIMER("inner_add_yblock")
2682 for(y=0; y<b_h; y++){
2683 //FIXME ugly missue of obmc_stride
2684 const uint8_t *obmc1= obmc + y*obmc_stride;
2685 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2686 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2687 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2688 for(x=0; x<b_w; x++){
2689 int v= obmc1[x] * block[3][x + y*src_stride]
2690 +obmc2[x] * block[2][x + y*src_stride]
2691 +obmc3[x] * block[1][x + y*src_stride]
2692 +obmc4[x] * block[0][x + y*src_stride];
2694 v <<= 8 - LOG2_OBMC_MAX;
2696 v += 1<<(7 - FRAC_BITS);
2697 v >>= 8 - FRAC_BITS;
2700 v += dst[x + y*dst_stride];
2701 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2702 if(v&(~255)) v= ~(v>>31);
2703 dst8[x + y*src_stride] = v;
2705 dst[x + y*dst_stride] -= v;
2712 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2713 Plane *p= &s->plane[plane_index];
2714 const int mb_w= s->b_width << s->block_max_depth;
2715 const int mb_h= s->b_height << s->block_max_depth;
2717 int block_size = MB_SIZE >> s->block_max_depth;
2718 int block_w = plane_index ? block_size/2 : block_size;
2719 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2720 int obmc_stride= plane_index ? block_size : 2*block_size;
2721 int ref_stride= s->current_picture.linesize[plane_index];
2722 uint8_t *dst8= s->current_picture.data[plane_index];
2727 if(s->keyframe || (s->avctx->debug&512)){
2732 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2734 // DWTELEM * line = slice_buffer_get_line(sb, y);
2735 DWTELEM * line = sb->line[y];
2738 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2739 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2741 if(v&(~255)) v= ~(v>>31);
2742 dst8[x + y*ref_stride]= v;
2746 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2748 // DWTELEM * line = slice_buffer_get_line(sb, y);
2749 DWTELEM * line = sb->line[y];
2752 line[x] -= 128 << FRAC_BITS;
2753 // buf[x + y*w]-= 128<<FRAC_BITS;
2761 for(mb_x=0; mb_x<=mb_w; mb_x++){
2764 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2765 block_w*mb_x - block_w/2,
2766 block_w*mb_y - block_w/2,
2769 w, ref_stride, obmc_stride,
2771 add, 0, plane_index);
2773 STOP_TIMER("add_yblock")
2776 STOP_TIMER("predict_slice")
2779 static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2780 Plane *p= &s->plane[plane_index];
2781 const int mb_w= s->b_width << s->block_max_depth;
2782 const int mb_h= s->b_height << s->block_max_depth;
2784 int block_size = MB_SIZE >> s->block_max_depth;
2785 int block_w = plane_index ? block_size/2 : block_size;
2786 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2787 const int obmc_stride= plane_index ? block_size : 2*block_size;
2788 int ref_stride= s->current_picture.linesize[plane_index];
2789 uint8_t *dst8= s->current_picture.data[plane_index];
2794 if(s->keyframe || (s->avctx->debug&512)){
2799 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2801 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2803 if(v&(~255)) v= ~(v>>31);
2804 dst8[x + y*ref_stride]= v;
2808 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2810 buf[x + y*w]-= 128<<FRAC_BITS;
2818 for(mb_x=0; mb_x<=mb_w; mb_x++){
2821 add_yblock(s, 0, NULL, buf, dst8, obmc,
2822 block_w*mb_x - block_w/2,
2823 block_w*mb_y - block_w/2,
2826 w, ref_stride, obmc_stride,
2828 add, 1, plane_index);
2830 STOP_TIMER("add_yblock")
2833 STOP_TIMER("predict_slice")
2836 static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2837 const int mb_h= s->b_height << s->block_max_depth;
2839 for(mb_y=0; mb_y<=mb_h; mb_y++)
2840 predict_slice(s, buf, plane_index, add, mb_y);
2843 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2845 Plane *p= &s->plane[plane_index];
2846 const int block_size = MB_SIZE >> s->block_max_depth;
2847 const int block_w = plane_index ? block_size/2 : block_size;
2848 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2849 const int obmc_stride= plane_index ? block_size : 2*block_size;
2850 const int ref_stride= s->current_picture.linesize[plane_index];
2851 uint8_t *src= s-> input_picture.data[plane_index];
2852 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2853 const int b_stride = s->b_width << s->block_max_depth;
2854 const int w= p->width;
2855 const int h= p->height;
2856 int index= mb_x + mb_y*b_stride;
2857 BlockNode *b= &s->block[index];
2858 BlockNode backup= *b;
2862 b->type|= BLOCK_INTRA;
2863 b->color[plane_index]= 0;
2864 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2867 int mb_x2= mb_x + (i &1) - 1;
2868 int mb_y2= mb_y + (i>>1) - 1;
2869 int x= block_w*mb_x2 + block_w/2;
2870 int y= block_w*mb_y2 + block_w/2;
2872 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2873 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2875 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2876 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2877 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2878 int obmc_v= obmc[index];
2880 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2881 if(x<0) obmc_v += obmc[index + block_w];
2882 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2883 if(x+block_w>w) obmc_v += obmc[index - block_w];
2884 //FIXME precalc this or simplify it somehow else
2886 d = -dst[index] + (1<<(FRAC_BITS-1));
2888 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2889 aa += obmc_v * obmc_v; //FIXME precalclate this
2895 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2898 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2899 const int b_stride = s->b_width << s->block_max_depth;
2900 const int b_height = s->b_height<< s->block_max_depth;
2901 int index= x + y*b_stride;
2902 const BlockNode *b = &s->block[index];
2903 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2904 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2905 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2906 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2908 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2909 // int my_context= av_log2(2*FFABS(left->my - top->my));
2911 if(x<0 || x>=b_stride || y>=b_height)
2918 00001XXXX 15-30 8-15
2920 //FIXME try accurate rate
2921 //FIXME intra and inter predictors if surrounding blocks arent the same type
2922 if(b->type & BLOCK_INTRA){
2923 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2924 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2925 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2927 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2930 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2931 + av_log2(2*FFABS(dmy))
2932 + av_log2(2*b->ref));
2936 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2937 Plane *p= &s->plane[plane_index];
2938 const int block_size = MB_SIZE >> s->block_max_depth;
2939 const int block_w = plane_index ? block_size/2 : block_size;
2940 const int obmc_stride= plane_index ? block_size : 2*block_size;
2941 const int ref_stride= s->current_picture.linesize[plane_index];
2942 uint8_t *dst= s->current_picture.data[plane_index];
2943 uint8_t *src= s-> input_picture.data[plane_index];
2944 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2945 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2946 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2947 const int b_stride = s->b_width << s->block_max_depth;
2948 const int b_height = s->b_height<< s->block_max_depth;
2949 const int w= p->width;
2950 const int h= p->height;
2953 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2954 int sx= block_w*mb_x - block_w/2;
2955 int sy= block_w*mb_y - block_w/2;
2956 int x0= FFMAX(0,-sx);
2957 int y0= FFMAX(0,-sy);
2958 int x1= FFMIN(block_w*2, w-sx);
2959 int y1= FFMIN(block_w*2, h-sy);
2962 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2964 for(y=y0; y<y1; y++){
2965 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2966 const DWTELEM *pred1 = pred + y*obmc_stride;
2967 uint8_t *cur1 = cur + y*ref_stride;
2968 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2969 for(x=x0; x<x1; x++){
2970 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2971 v = (v + pred1[x]) >> FRAC_BITS;
2972 if(v&(~255)) v= ~(v>>31);
2977 /* copy the regions where obmc[] = (uint8_t)256 */
2978 if(LOG2_OBMC_MAX == 8
2979 && (mb_x == 0 || mb_x == b_stride-1)
2980 && (mb_y == 0 || mb_y == b_height-1)){
2989 for(y=y0; y<y1; y++)
2990 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2994 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2995 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2996 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2997 * so improving the score of one block is not strictly guaranteed to
2998 * improve the score of the whole frame, so iterative motion est
2999 * doesn't always converge. */
3000 if(s->avctx->me_cmp == FF_CMP_W97)
3001 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3002 else if(s->avctx->me_cmp == FF_CMP_W53)
3003 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3007 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3008 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3013 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3022 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3024 if(mb_x == b_stride-2)
3025 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3027 return distortion + rate*penalty_factor;
3030 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3032 Plane *p= &s->plane[plane_index];
3033 const int block_size = MB_SIZE >> s->block_max_depth;
3034 const int block_w = plane_index ? block_size/2 : block_size;
3035 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3036 const int obmc_stride= plane_index ? block_size : 2*block_size;
3037 const int ref_stride= s->current_picture.linesize[plane_index];
3038 uint8_t *dst= s->current_picture.data[plane_index];
3039 uint8_t *src= s-> input_picture.data[plane_index];
3040 static const DWTELEM zero_dst[4096]; //FIXME
3041 const int b_stride = s->b_width << s->block_max_depth;
3042 const int w= p->width;
3043 const int h= p->height;
3046 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3049 int mb_x2= mb_x + (i%3) - 1;
3050 int mb_y2= mb_y + (i/3) - 1;
3051 int x= block_w*mb_x2 + block_w/2;
3052 int y= block_w*mb_y2 + block_w/2;
3054 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3055 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3057 //FIXME find a cleaner/simpler way to skip the outside stuff
3058 for(y2= y; y2<0; y2++)
3059 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3060 for(y2= h; y2<y+block_w; y2++)
3061 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3063 for(y2= y; y2<y+block_w; y2++)
3064 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3067 for(y2= y; y2<y+block_w; y2++)
3068 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3071 assert(block_w== 8 || block_w==16);
3072 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3076 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3077 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3085 rate = get_block_bits(s, mb_x, mb_y, 2);
3086 for(i=merged?4:0; i<9; i++){
3087 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3088 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3091 return distortion + rate*penalty_factor;
3094 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3095 const int b_stride= s->b_width << s->block_max_depth;
3096 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3097 BlockNode backup= *block;
3098 int rd, index, value;
3100 assert(mb_x>=0 && mb_y>=0);
3101 assert(mb_x<b_stride);
3104 block->color[0] = p[0];
3105 block->color[1] = p[1];
3106 block->color[2] = p[2];
3107 block->type |= BLOCK_INTRA;
3109 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3110 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3111 if(s->me_cache[index] == value)
3113 s->me_cache[index]= value;
3117 block->type &= ~BLOCK_INTRA;
3120 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3132 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3133 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3134 int p[2] = {p0, p1};
3135 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3138 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3139 const int b_stride= s->b_width << s->block_max_depth;
3140 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3141 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3142 int rd, index, value;
3144 assert(mb_x>=0 && mb_y>=0);
3145 assert(mb_x<b_stride);
3146 assert(((mb_x|mb_y)&1) == 0);
3148 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3149 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3150 if(s->me_cache[index] == value)
3152 s->me_cache[index]= value;
3157 block->type &= ~BLOCK_INTRA;
3158 block[1]= block[b_stride]= block[b_stride+1]= *block;
3160 rd= get_4block_rd(s, mb_x, mb_y, 0);
3167 block[0]= backup[0];
3168 block[1]= backup[1];
3169 block[b_stride]= backup[2];
3170 block[b_stride+1]= backup[3];
3175 static void iterative_me(SnowContext *s){
3176 int pass, mb_x, mb_y;
3177 const int b_width = s->b_width << s->block_max_depth;
3178 const int b_height= s->b_height << s->block_max_depth;
3179 const int b_stride= b_width;
3183 RangeCoder r = s->c;
3184 uint8_t state[sizeof(s->block_state)];
3185 memcpy(state, s->block_state, sizeof(s->block_state));
3186 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3187 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3188 encode_q_branch(s, 0, mb_x, mb_y);
3190 memcpy(s->block_state, state, sizeof(s->block_state));
3193 for(pass=0; pass<25; pass++){
3196 for(mb_y= 0; mb_y<b_height; mb_y++){
3197 for(mb_x= 0; mb_x<b_width; mb_x++){
3198 int dia_change, i, j, ref;
3199 int best_rd= INT_MAX, ref_rd;
3200 BlockNode backup, ref_b;
3201 const int index= mb_x + mb_y * b_stride;
3202 BlockNode *block= &s->block[index];
3203 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3204 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3205 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3206 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3207 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3208 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3209 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3210 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3211 const int b_w= (MB_SIZE >> s->block_max_depth);
3212 uint8_t obmc_edged[b_w*2][b_w*2];
3214 if(pass && (block->type & BLOCK_OPT))
3216 block->type |= BLOCK_OPT;
3220 if(!s->me_cache_generation)
3221 memset(s->me_cache, 0, sizeof(s->me_cache));
3222 s->me_cache_generation += 1<<22;
3227 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3229 for(y=0; y<b_w*2; y++)
3230 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3231 if(mb_x==b_stride-1)
3232 for(y=0; y<b_w*2; y++)
3233 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3235 for(x=0; x<b_w*2; x++)
3236 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3237 for(y=1; y<b_w; y++)
3238 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3240 if(mb_y==b_height-1){
3241 for(x=0; x<b_w*2; x++)
3242 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3243 for(y=b_w; y<b_w*2-1; y++)
3244 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3248 //skip stuff outside the picture
3249 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3251 uint8_t *src= s-> input_picture.data[0];
3252 uint8_t *dst= s->current_picture.data[0];
3253 const int stride= s->current_picture.linesize[0];
3254 const int block_w= MB_SIZE >> s->block_max_depth;
3255 const int sx= block_w*mb_x - block_w/2;
3256 const int sy= block_w*mb_y - block_w/2;
3257 const int w= s->plane[0].width;
3258 const int h= s->plane[0].height;
3262 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3263 for(y=h; y<sy+block_w*2; y++)
3264 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3266 for(y=sy; y<sy+block_w*2; y++)
3267 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3269 if(sx+block_w*2 > w){
3270 for(y=sy; y<sy+block_w*2; y++)
3271 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3275 // intra(black) = neighbors' contribution to the current block
3277 color[i]= get_dc(s, mb_x, mb_y, i);
3279 // get previous score (cant be cached due to OBMC)
3280 if(pass > 0 && (block->type&BLOCK_INTRA)){
3281 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3282 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3284 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3288 for(ref=0; ref < s->ref_frames; ref++){
3289 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3290 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3295 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3296 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3298 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3300 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3302 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3304 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3307 //FIXME avoid subpel interpol / round to nearest integer
3310 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3312 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3313 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3314 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3315 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3321 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3324 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3326 //FIXME or try the standard 2 pass qpel or similar
3328 mvr[0][0]= block->mx;
3329 mvr[0][1]= block->my;
3330 if(ref_rd > best_rd){
3338 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3339 //FIXME RD style color selection
3341 if(!same_block(block, &backup)){
3342 if(tb ) tb ->type &= ~BLOCK_OPT;
3343 if(lb ) lb ->type &= ~BLOCK_OPT;
3344 if(rb ) rb ->type &= ~BLOCK_OPT;
3345 if(bb ) bb ->type &= ~BLOCK_OPT;
3346 if(tlb) tlb->type &= ~BLOCK_OPT;
3347 if(trb) trb->type &= ~BLOCK_OPT;
3348 if(blb) blb->type &= ~BLOCK_OPT;
3349 if(brb) brb->type &= ~BLOCK_OPT;
3354 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3359 if(s->block_max_depth == 1){
3361 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3362 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3364 int best_rd, init_rd;
3365 const int index= mb_x + mb_y * b_stride;
3368 b[0]= &s->block[index];
3370 b[2]= b[0]+b_stride;
3372 if(same_block(b[0], b[1]) &&
3373 same_block(b[0], b[2]) &&
3374 same_block(b[0], b[3]))
3377 if(!s->me_cache_generation)
3378 memset(s->me_cache, 0, sizeof(s->me_cache));
3379 s->me_cache_generation += 1<<22;
3381 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3383 //FIXME more multiref search?
3384 check_4block_inter(s, mb_x, mb_y,
3385 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3386 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3389 if(!(b[i]->type&BLOCK_INTRA))
3390 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3392 if(init_rd != best_rd)
3396 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3400 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3401 const int level= b->level;
3402 const int w= b->width;
3403 const int h= b->height;
3404 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3405 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3406 int x,y, thres1, thres2;
3409 if(s->qlog == LOSSLESS_QLOG) return;
3411 bias= bias ? 0 : (3*qmul)>>3;
3412 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3418 int i= src[x + y*stride];
3420 if((unsigned)(i+thres1) > thres2){
3423 i/= qmul; //FIXME optimize
3424 src[x + y*stride]= i;
3428 i/= qmul; //FIXME optimize
3429 src[x + y*stride]= -i;
3432 src[x + y*stride]= 0;
3438 int i= src[x + y*stride];
3440 if((unsigned)(i+thres1) > thres2){
3443 i= (i + bias) / qmul; //FIXME optimize
3444 src[x + y*stride]= i;
3448 i= (i + bias) / qmul; //FIXME optimize
3449 src[x + y*stride]= -i;
3452 src[x + y*stride]= 0;
3456 if(level+1 == s->spatial_decomposition_count){
3457 // STOP_TIMER("quantize")
3461 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3462 const int w= b->width;
3463 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3464 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3465 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3469 if(s->qlog == LOSSLESS_QLOG) return;
3471 for(y=start_y; y<end_y; y++){
3472 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3473 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3477 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3479 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3483 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3484 STOP_TIMER("dquant")
3488 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3489 const int w= b->width;
3490 const int h= b->height;
3491 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3492 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3493 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3497 if(s->qlog == LOSSLESS_QLOG) return;
3501 int i= src[x + y*stride];
3503 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3505 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3509 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3510 STOP_TIMER("dquant")
3514 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3515 const int w= b->width;
3516 const int h= b->height;
3519 for(y=h-1; y>=0; y--){
3520 for(x=w-1; x>=0; x--){
3521 int i= x + y*stride;
3525 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3526 else src[i] -= src[i - 1];
3528 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3529 else src[i] -= src[i - 1];
3532 if(y) src[i] -= src[i - stride];
3538 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3539 const int w= b->width;
3544 DWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3548 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3550 for(y=start_y; y<end_y; y++){
3552 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3553 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3557 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3558 else line[x] += line[x - 1];
3560 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3561 else line[x] += line[x - 1];
3564 if(y) line[x] += prev[x];
3569 // STOP_TIMER("correlate")
3572 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3573 const int w= b->width;
3574 const int h= b->height;
3579 int i= x + y*stride;
3583 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3584 else src[i] += src[i - 1];
3586 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3587 else src[i] += src[i - 1];
3590 if(y) src[i] += src[i - stride];
3596 static void encode_header(SnowContext *s){
3597 int plane_index, level, orientation;
3600 memset(kstate, MID_STATE, sizeof(kstate));
3602 put_rac(&s->c, kstate, s->keyframe);
3603 if(s->keyframe || s->always_reset){
3605 s->last_spatial_decomposition_type=
3609 s->last_block_max_depth= 0;
3612 put_symbol(&s->c, s->header_state, s->version, 0);
3613 put_rac(&s->c, s->header_state, s->always_reset);
3614 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3615 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3616 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3617 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3618 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3619 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3620 put_rac(&s->c, s->header_state, s->spatial_scalability);
3621 // put_rac(&s->c, s->header_state, s->rate_scalability);
3622 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3624 for(plane_index=0; plane_index<2; plane_index++){
3625 for(level=0; level<s->spatial_decomposition_count; level++){
3626 for(orientation=level ? 1:0; orientation<4; orientation++){
3627 if(orientation==2) continue;
3628 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3633 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3634 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3635 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3636 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3637 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3639 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3640 s->last_qlog = s->qlog;
3641 s->last_qbias = s->qbias;
3642 s->last_mv_scale = s->mv_scale;
3643 s->last_block_max_depth = s->block_max_depth;
3646 static int decode_header(SnowContext *s){
3647 int plane_index, level, orientation;
3650 memset(kstate, MID_STATE, sizeof(kstate));
3652 s->keyframe= get_rac(&s->c, kstate);
3653 if(s->keyframe || s->always_reset){
3655 s->spatial_decomposition_type=
3659 s->block_max_depth= 0;
3662 s->version= get_symbol(&s->c, s->header_state, 0);
3664 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3667 s->always_reset= get_rac(&s->c, s->header_state);
3668 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3669 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3670 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3671 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3672 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3673 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3674 s->spatial_scalability= get_rac(&s->c, s->header_state);
3675 // s->rate_scalability= get_rac(&s->c, s->header_state);
3676 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3678 for(plane_index=0; plane_index<3; plane_index++){
3679 for(level=0; level<s->spatial_decomposition_count; level++){
3680 for(orientation=level ? 1:0; orientation<4; orientation++){
3682 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3683 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3684 else q= get_symbol(&s->c, s->header_state, 1);
3685 s->plane[plane_index].band[level][orientation].qlog= q;
3691 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3692 if(s->spatial_decomposition_type > 2){
3693 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3697 s->qlog += get_symbol(&s->c, s->header_state, 1);
3698 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3699 s->qbias += get_symbol(&s->c, s->header_state, 1);
3700 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3701 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3702 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3703 s->block_max_depth= 0;
3710 static void init_qexp(void){
3714 for(i=0; i<QROOT; i++){
3716 v *= pow(2, 1.0 / QROOT);
3720 static int common_init(AVCodecContext *avctx){
3721 SnowContext *s = avctx->priv_data;
3723 int level, orientation, plane_index, dec;
3728 dsputil_init(&s->dsp, avctx);
3731 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3732 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3733 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3734 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3735 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3736 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3755 #define mcfh(dx,dy)\
3756 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3757 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3758 mc_block_hpel ## dx ## dy ## 16;\
3759 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3760 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3761 mc_block_hpel ## dx ## dy ## 8;
3771 dec= s->spatial_decomposition_count= 5;
3772 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3774 s->chroma_h_shift= 1; //FIXME XXX
3775 s->chroma_v_shift= 1;
3777 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3779 width= s->avctx->width;
3780 height= s->avctx->height;
3782 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3784 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3785 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3787 for(plane_index=0; plane_index<3; plane_index++){
3788 int w= s->avctx->width;
3789 int h= s->avctx->height;
3792 w>>= s->chroma_h_shift;
3793 h>>= s->chroma_v_shift;
3795 s->plane[plane_index].width = w;
3796 s->plane[plane_index].height= h;
3797 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3798 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3799 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3800 SubBand *b= &s->plane[plane_index].band[level][orientation];
3802 b->buf= s->spatial_dwt_buffer;
3804 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3805 b->width = (w + !(orientation&1))>>1;
3806 b->height= (h + !(orientation>1))>>1;
3808 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3809 b->buf_x_offset = 0;
3810 b->buf_y_offset = 0;
3814 b->buf_x_offset = (w+1)>>1;
3817 b->buf += b->stride>>1;
3818 b->buf_y_offset = b->stride_line >> 1;
3822 b->parent= &s->plane[plane_index].band[level-1][orientation];
3823 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3830 for(i=0; i<MAX_REF_FRAMES; i++)
3831 for(j=0; j<MAX_REF_FRAMES; j++)
3832 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3836 width= s->width= avctx->width;
3837 height= s->height= avctx->height;
3839 assert(width && height);
3841 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3846 static int qscale2qlog(int qscale){
3847 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3848 + 61*QROOT/8; //<64 >60
3851 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3853 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3854 * FIXME we know exact mv bits at this point,
3855 * but ratecontrol isn't set up to include them. */
3856 uint32_t coef_sum= 0;
3857 int level, orientation, delta_qlog;
3859 for(level=0; level<s->spatial_decomposition_count; level++){
3860 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3861 SubBand *b= &s->plane[0].band[level][orientation];
3862 DWTELEM *buf= b->buf;
3863 const int w= b->width;
3864 const int h= b->height;
3865 const int stride= b->stride;
3866 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3867 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3868 const int qdiv= (1<<16)/qmul;
3871 decorrelate(s, b, buf, stride, 1, 0);
3874 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3876 correlate(s, b, buf, stride, 1, 0);
3880 /* ugly, ratecontrol just takes a sqrt again */
3881 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3882 assert(coef_sum < INT_MAX);
3884 if(pict->pict_type == I_TYPE){
3885 s->m.current_picture.mb_var_sum= coef_sum;
3886 s->m.current_picture.mc_mb_var_sum= 0;
3888 s->m.current_picture.mc_mb_var_sum= coef_sum;
3889 s->m.current_picture.mb_var_sum= 0;
3892 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3893 if (pict->quality < 0)
3895 s->lambda= pict->quality * 3/2;
3896 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3897 s->qlog+= delta_qlog;
3901 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3902 int width = p->width;
3903 int height= p->height;
3904 int level, orientation, x, y;
3906 for(level=0; level<s->spatial_decomposition_count; level++){
3907 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3908 SubBand *b= &p->band[level][orientation];
3909 DWTELEM *buf= b->buf;
3912 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3913 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3914 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3915 for(y=0; y<height; y++){
3916 for(x=0; x<width; x++){
3917 int64_t d= s->spatial_dwt_buffer[x + y*width];
3922 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3923 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3928 static int encode_init(AVCodecContext *avctx)
3930 SnowContext *s = avctx->priv_data;
3933 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3934 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3935 "use vstrict=-2 / -strict -2 to use it anyway\n");
3939 if(avctx->prediction_method == DWT_97
3940 && (avctx->flags & CODEC_FLAG_QSCALE)
3941 && avctx->global_quality == 0){
3942 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3952 s->m.flags = avctx->flags;
3953 s->m.bit_rate= avctx->bit_rate;
3955 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3956 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3957 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3958 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3959 h263_encode_init(&s->m); //mv_penalty
3961 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3963 if(avctx->flags&CODEC_FLAG_PASS1){
3964 if(!avctx->stats_out)
3965 avctx->stats_out = av_mallocz(256);
3967 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3968 if(ff_rate_control_init(&s->m) < 0)
3971 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3973 for(plane_index=0; plane_index<3; plane_index++){
3974 calculate_vissual_weight(s, &s->plane[plane_index]);
3978 avctx->coded_frame= &s->current_picture;
3979 switch(avctx->pix_fmt){
3980 // case PIX_FMT_YUV444P:
3981 // case PIX_FMT_YUV422P:
3982 case PIX_FMT_YUV420P:
3984 // case PIX_FMT_YUV411P:
3985 // case PIX_FMT_YUV410P:
3986 s->colorspace_type= 0;
3988 /* case PIX_FMT_RGB32:
3992 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3995 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3996 s->chroma_h_shift= 1;
3997 s->chroma_v_shift= 1;
3999 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4000 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4002 s->avctx->get_buffer(s->avctx, &s->input_picture);
4004 if(s->avctx->me_method == ME_ITER){
4006 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4007 for(i=0; i<s->max_ref_frames; i++){
4008 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4009 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4016 static int frame_start(SnowContext *s){
4018 int w= s->avctx->width; //FIXME round up to x16 ?
4019 int h= s->avctx->height;
4021 if(s->current_picture.data[0]){
4022 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4023 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4024 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4027 tmp= s->last_picture[s->max_ref_frames-1];
4028 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4029 s->last_picture[0]= s->current_picture;
4030 s->current_picture= tmp;
4036 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4037 if(i && s->last_picture[i-1].key_frame)
4042 s->current_picture.reference= 1;
4043 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4044 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4048 s->current_picture.key_frame= s->keyframe;
4053 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4054 SnowContext *s = avctx->priv_data;
4055 RangeCoder * const c= &s->c;
4056 AVFrame *pict = data;
4057 const int width= s->avctx->width;
4058 const int height= s->avctx->height;
4059 int level, orientation, plane_index, i, y;
4060 uint8_t rc_header_bak[sizeof(s->header_state)];
4061 uint8_t rc_block_bak[sizeof(s->block_state)];
4063 ff_init_range_encoder(c, buf, buf_size);
4064 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4068 for(y=0; y<(height>>shift); y++)
4069 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4070 &pict->data[i][y * pict->linesize[i]],
4073 s->new_picture = *pict;
4075 s->m.picture_number= avctx->frame_number;
4076 if(avctx->flags&CODEC_FLAG_PASS2){
4078 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4079 s->keyframe= pict->pict_type==FF_I_TYPE;
4080 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4081 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4082 if (pict->quality < 0)
4086 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4088 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4091 if(s->pass1_rc && avctx->frame_number == 0)
4092 pict->quality= 2*FF_QP2LAMBDA;
4094 s->qlog= qscale2qlog(pict->quality);
4095 s->lambda = pict->quality * 3/2;
4097 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4098 s->qlog= LOSSLESS_QLOG;
4100 }//else keep previous frame's qlog until after motion est
4104 s->m.current_picture_ptr= &s->m.current_picture;
4105 if(pict->pict_type == P_TYPE){
4106 int block_width = (width +15)>>4;
4107 int block_height= (height+15)>>4;
4108 int stride= s->current_picture.linesize[0];
4110 assert(s->current_picture.data[0]);
4111 assert(s->last_picture[0].data[0]);
4113 s->m.avctx= s->avctx;
4114 s->m.current_picture.data[0]= s->current_picture.data[0];
4115 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4116 s->m. new_picture.data[0]= s-> input_picture.data[0];
4117 s->m. last_picture_ptr= &s->m. last_picture;
4119 s->m. last_picture.linesize[0]=
4120 s->m. new_picture.linesize[0]=
4121 s->m.current_picture.linesize[0]= stride;
4122 s->m.uvlinesize= s->current_picture.linesize[1];
4124 s->m.height= height;
4125 s->m.mb_width = block_width;
4126 s->m.mb_height= block_height;
4127 s->m.mb_stride= s->m.mb_width+1;
4128 s->m.b8_stride= 2*s->m.mb_width+1;
4130 s->m.pict_type= pict->pict_type;
4131 s->m.me_method= s->avctx->me_method;
4132 s->m.me.scene_change_score=0;
4133 s->m.flags= s->avctx->flags;
4134 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4135 s->m.out_format= FMT_H263;
4136 s->m.unrestricted_mv= 1;
4138 s->m.lambda = s->lambda;
4139 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4140 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4142 s->m.dsp= s->dsp; //move
4148 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4149 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4154 s->m.pict_type = pict->pict_type;
4155 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4158 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4159 encode_blocks(s, 1);
4160 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4162 for(plane_index=0; plane_index<3; plane_index++){
4163 Plane *p= &s->plane[plane_index];
4167 // int bits= put_bits_count(&s->c.pb);
4169 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4171 if(pict->data[plane_index]) //FIXME gray hack
4174 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4177 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4180 && pict->pict_type == P_TYPE
4181 && !(avctx->flags&CODEC_FLAG_PASS2)
4182 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4183 ff_init_range_encoder(c, buf, buf_size);
4184 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4185 pict->pict_type= FF_I_TYPE;
4187 s->current_picture.key_frame=1;
4191 if(s->qlog == LOSSLESS_QLOG){
4194 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4199 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4201 if(s->pass1_rc && plane_index==0){
4202 int delta_qlog = ratecontrol_1pass(s, pict);
4203 if (delta_qlog <= INT_MIN)
4206 //reordering qlog in the bitstream would eliminate this reset
4207 ff_init_range_encoder(c, buf, buf_size);
4208 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4209 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4211 encode_blocks(s, 0);
4215 for(level=0; level<s->spatial_decomposition_count; level++){
4216 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4217 SubBand *b= &p->band[level][orientation];
4219 quantize(s, b, b->buf, b->stride, s->qbias);
4221 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4222 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4223 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4225 correlate(s, b, b->buf, b->stride, 1, 0);
4228 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4230 for(level=0; level<s->spatial_decomposition_count; level++){
4231 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4232 SubBand *b= &p->band[level][orientation];
4234 dequantize(s, b, b->buf, b->stride);
4238 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4239 if(s->qlog == LOSSLESS_QLOG){
4242 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4247 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4248 STOP_TIMER("pred-conv")}
4251 if(pict->pict_type == I_TYPE){
4254 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4255 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4259 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4260 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4263 if(s->avctx->flags&CODEC_FLAG_PSNR){
4266 if(pict->data[plane_index]) //FIXME gray hack
4269 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4273 s->avctx->error[plane_index] += error;
4274 s->current_picture.error[plane_index] = error;
4278 if(s->last_picture[s->max_ref_frames-1].data[0])
4279 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4281 s->current_picture.coded_picture_number = avctx->frame_number;
4282 s->current_picture.pict_type = pict->pict_type;
4283 s->current_picture.quality = pict->quality;
4284 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4285 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4286 s->m.current_picture.display_picture_number =
4287 s->m.current_picture.coded_picture_number = avctx->frame_number;
4288 s->m.current_picture.quality = pict->quality;
4289 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4291 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4293 if(avctx->flags&CODEC_FLAG_PASS1)
4294 ff_write_pass1_stats(&s->m);
4295 s->m.last_pict_type = s->m.pict_type;
4296 avctx->frame_bits = s->m.frame_bits;
4297 avctx->mv_bits = s->m.mv_bits;
4298 avctx->misc_bits = s->m.misc_bits;
4299 avctx->p_tex_bits = s->m.p_tex_bits;
4303 return ff_rac_terminate(c);
4306 static void common_end(SnowContext *s){
4307 int plane_index, level, orientation, i;
4309 av_freep(&s->spatial_dwt_buffer);
4311 av_freep(&s->m.me.scratchpad);
4312 av_freep(&s->m.me.map);
4313 av_freep(&s->m.me.score_map);
4314 av_freep(&s->m.obmc_scratchpad);
4316 av_freep(&s->block);
4318 for(i=0; i<MAX_REF_FRAMES; i++){
4319 av_freep(&s->ref_mvs[i]);
4320 av_freep(&s->ref_scores[i]);
4321 if(s->last_picture[i].data[0])
4322 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4325 for(plane_index=0; plane_index<3; plane_index++){
4326 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4327 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4328 SubBand *b= &s->plane[plane_index].band[level][orientation];
4330 av_freep(&b->x_coeff);
4336 static int encode_end(AVCodecContext *avctx)
4338 SnowContext *s = avctx->priv_data;
4341 av_free(avctx->stats_out);
4346 static int decode_init(AVCodecContext *avctx)
4348 SnowContext *s = avctx->priv_data;
4351 avctx->pix_fmt= PIX_FMT_YUV420P;
4355 block_size = MB_SIZE >> s->block_max_depth;
4356 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4361 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4362 SnowContext *s = avctx->priv_data;
4363 RangeCoder * const c= &s->c;
4365 AVFrame *picture = data;
4366 int level, orientation, plane_index;
4368 ff_init_range_decoder(c, buf, buf_size);
4369 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4371 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4373 if(!s->block) alloc_blocks(s);
4376 //keyframe flag dupliaction mess FIXME
4377 if(avctx->debug&FF_DEBUG_PICT_INFO)
4378 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4382 for(plane_index=0; plane_index<3; plane_index++){
4383 Plane *p= &s->plane[plane_index];
4387 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4389 if(s->avctx->debug&2048){
4390 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4391 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4395 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4396 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4402 for(level=0; level<s->spatial_decomposition_count; level++){
4403 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4404 SubBand *b= &p->band[level][orientation];
4405 unpack_coeffs(s, b, b->parent, orientation);
4408 STOP_TIMER("unpack coeffs");
4412 const int mb_h= s->b_height << s->block_max_depth;
4413 const int block_size = MB_SIZE >> s->block_max_depth;
4414 const int block_w = plane_index ? block_size/2 : block_size;
4416 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4421 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4422 for(mb_y=0; mb_y<=mb_h; mb_y++){
4424 int slice_starty = block_w*mb_y;
4425 int slice_h = block_w*(mb_y+1);
4426 if (!(s->keyframe || s->avctx->debug&512)){
4427 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4428 slice_h -= (block_w >> 1);
4433 for(level=0; level<s->spatial_decomposition_count; level++){
4434 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4435 SubBand *b= &p->band[level][orientation];
4438 int our_mb_start = mb_y;
4439 int our_mb_end = (mb_y + 1);
4441 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4442 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4443 if (!(s->keyframe || s->avctx->debug&512)){
4444 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4445 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4447 start_y = FFMIN(b->height, start_y);
4448 end_y = FFMIN(b->height, end_y);
4450 if (start_y != end_y){
4451 if (orientation == 0){
4452 SubBand * correlate_band = &p->band[0][0];
4453 int correlate_end_y = FFMIN(b->height, end_y + 1);
4454 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4455 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4456 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4457 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4460 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4464 STOP_TIMER("decode_subband_slice");
4468 for(; yd<slice_h; yd+=4){
4469 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4471 STOP_TIMER("idwt slice");}
4474 if(s->qlog == LOSSLESS_QLOG){
4475 for(; yq<slice_h && yq<h; yq++){
4476 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4478 line[x] <<= FRAC_BITS;
4483 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4485 y = FFMIN(p->height, slice_starty);
4486 end_y = FFMIN(p->height, slice_h);
4488 slice_buffer_release(&s->sb, y++);
4491 slice_buffer_flush(&s->sb);
4493 STOP_TIMER("idwt + predict_slices")}
4498 if(s->last_picture[s->max_ref_frames-1].data[0])
4499 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4501 if(!(s->avctx->debug&2048))
4502 *picture= s->current_picture;
4504 *picture= s->mconly_picture;
4506 *data_size = sizeof(AVFrame);
4508 bytes_read= c->bytestream - c->bytestream_start;
4509 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4514 static int decode_end(AVCodecContext *avctx)
4516 SnowContext *s = avctx->priv_data;
4518 slice_buffer_destroy(&s->sb);
4525 AVCodec snow_decoder = {
4529 sizeof(SnowContext),
4534 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4538 #ifdef CONFIG_ENCODERS
4539 AVCodec snow_encoder = {
4543 sizeof(SnowContext),
4559 int buffer[2][width*height];
4562 s.spatial_decomposition_count=6;
4563 s.spatial_decomposition_type=1;
4565 printf("testing 5/3 DWT\n");
4566 for(i=0; i<width*height; i++)
4567 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4569 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4570 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4572 for(i=0; i<width*height; i++)
4573 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4575 printf("testing 9/7 DWT\n");
4576 s.spatial_decomposition_type=0;
4577 for(i=0; i<width*height; i++)
4578 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4580 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4581 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4583 for(i=0; i<width*height; i++)
4584 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4587 printf("testing AC coder\n");
4588 memset(s.header_state, 0, sizeof(s.header_state));
4589 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4590 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4592 for(i=-256; i<256; i++){
4594 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4595 STOP_TIMER("put_symbol")
4597 ff_rac_terminate(&s.c);
4599 memset(s.header_state, 0, sizeof(s.header_state));
4600 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4601 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4603 for(i=-256; i<256; i++){
4606 j= get_symbol(&s.c, s.header_state, 1);
4607 STOP_TIMER("get_symbol")
4608 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4612 int level, orientation, x, y;
4613 int64_t errors[8][4];
4616 memset(errors, 0, sizeof(errors));
4617 s.spatial_decomposition_count=3;
4618 s.spatial_decomposition_type=0;
4619 for(level=0; level<s.spatial_decomposition_count; level++){
4620 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4621 int w= width >> (s.spatial_decomposition_count-level);
4622 int h= height >> (s.spatial_decomposition_count-level);
4623 int stride= width << (s.spatial_decomposition_count-level);
4624 DWTELEM *buf= buffer[0];
4627 if(orientation&1) buf+=w;
4628 if(orientation>1) buf+=stride>>1;
4630 memset(buffer[0], 0, sizeof(int)*width*height);
4631 buf[w/2 + h/2*stride]= 256*256;
4632 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4633 for(y=0; y<height; y++){
4634 for(x=0; x<width; x++){
4635 int64_t d= buffer[0][x + y*width];
4637 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4639 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4641 error= (int)(sqrt(error)+0.5);
4642 errors[level][orientation]= error;
4643 if(g) g=ff_gcd(g, error);
4647 printf("static int const visual_weight[][4]={\n");
4648 for(level=0; level<s.spatial_decomposition_count; level++){
4650 for(orientation=0; orientation<4; orientation++){
4651 printf("%8"PRId64",", errors[level][orientation]/g);
4659 int w= width >> (s.spatial_decomposition_count-level);
4660 int h= height >> (s.spatial_decomposition_count-level);
4661 int stride= width << (s.spatial_decomposition_count-level);
4662 DWTELEM *buf= buffer[0];
4668 memset(buffer[0], 0, sizeof(int)*width*height);
4670 for(y=0; y<height; y++){
4671 for(x=0; x<width; x++){
4672 int tab[4]={0,2,3,1};
4673 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4676 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4680 buf[x + y*stride ]=169;
4681 buf[x + y*stride-w]=64;
4684 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4686 for(y=0; y<height; y++){
4687 for(x=0; x<width; x++){
4688 int64_t d= buffer[0][x + y*width];
4690 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4692 if(FFABS(height/2-y)<9) printf("\n");