1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 /* ////////////////////////////////////////////////////////////////////
45 // Geometrical transforms on images and matrices: rotation, zoom etc.
54 /************** interpolation formulas and tables ***************/
56 const int INTER_RESIZE_COEF_BITS=11;
57 const int INTER_RESIZE_COEF_SCALE=1 << INTER_RESIZE_COEF_BITS;
59 const int INTER_REMAP_COEF_BITS=15;
60 const int INTER_REMAP_COEF_SCALE=1 << INTER_REMAP_COEF_BITS;
62 static uchar NNDeltaTab_i[INTER_TAB_SIZE2][2];
64 static float BilinearTab_f[INTER_TAB_SIZE2][2][2];
65 static short BilinearTab_i[INTER_TAB_SIZE2][2][2];
68 static short CV_DECL_ALIGNED(16) BilinearTab_iC4[INTER_TAB_SIZE2][2][8];
71 static float BicubicTab_f[INTER_TAB_SIZE2][4][4];
72 static short BicubicTab_i[INTER_TAB_SIZE2][4][4];
74 static float Lanczos4Tab_f[INTER_TAB_SIZE2][8][8];
75 static short Lanczos4Tab_i[INTER_TAB_SIZE2][8][8];
77 static inline void interpolateLinear( float x, float* coeffs )
83 static inline void interpolateCubic( float x, float* coeffs )
85 const float A = -0.75f;
87 coeffs[0] = ((A*(x + 1) - 5*A)*(x + 1) + 8*A)*(x + 1) - 4*A;
88 coeffs[1] = ((A + 2)*x - (A + 3))*x*x + 1;
89 coeffs[2] = ((A + 2)*(1 - x) - (A + 3))*(1 - x)*(1 - x) + 1;
90 coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
93 static inline void interpolateLanczos4( float x, float* coeffs )
95 static const double s45 = 0.70710678118654752440084436210485;
96 static const double cs[][2]=
97 {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
100 if( x < FLT_EPSILON )
102 for( int i = 0; i < 8; i++ )
109 double y0=-(x+3)*CV_PI*0.25, s0 = sin(y0), c0=cos(y0);
110 for( i = 0; i < 8; i++ )
112 double y = -(x+3-i)*CV_PI*0.25;
113 coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
118 for( i = 0; i < 8; i++ )
122 static void initInterTab1D(int method, float* tab, int tabsz)
124 float scale = 1.f/tabsz;
125 if( method == INTER_LINEAR )
127 for( int i = 0; i < tabsz; i++, tab += 2 )
128 interpolateLinear( i*scale, tab );
130 else if( method == INTER_CUBIC )
132 for( int i = 0; i < tabsz; i++, tab += 4 )
133 interpolateCubic( i*scale, tab );
135 else if( method == INTER_LANCZOS4 )
137 for( int i = 0; i < tabsz; i++, tab += 8 )
138 interpolateLanczos4( i*scale, tab );
141 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
145 static const void* initInterTab2D( int method, bool fixpt )
147 static bool inittab[INTER_MAX+1] = {false};
151 if( method == INTER_LINEAR )
152 tab = BilinearTab_f[0][0], itab = BilinearTab_i[0][0], ksize=2;
153 else if( method == INTER_CUBIC )
154 tab = BicubicTab_f[0][0], itab = BicubicTab_i[0][0], ksize=4;
155 else if( method == INTER_LANCZOS4 )
156 tab = Lanczos4Tab_f[0][0], itab = Lanczos4Tab_i[0][0], ksize=8;
158 CV_Error( CV_StsBadArg, "Unknown/unsupported interpolation type" );
160 if( !inittab[method] )
162 AutoBuffer<float> _tab(INTER_TAB_SIZE);
164 initInterTab1D(method, _tab, INTER_TAB_SIZE);
165 for( i = 0; i < INTER_TAB_SIZE; i++ )
166 for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
169 NNDeltaTab_i[i*INTER_TAB_SIZE+j][0] = j < INTER_TAB_SIZE/2;
170 NNDeltaTab_i[i*INTER_TAB_SIZE+j][1] = i < INTER_TAB_SIZE/2;
172 for( k1 = 0; k1 < ksize; k1++ )
174 float vy = _tab[i*ksize + k1];
175 for( k2 = 0; k2 < ksize; k2++ )
177 float v = vy*_tab[j*ksize + k2];
178 tab[k1*ksize + k2] = v;
179 isum += itab[k1*ksize + k2] = saturate_cast<short>(v*INTER_REMAP_COEF_SCALE);
183 if( isum != INTER_REMAP_COEF_SCALE )
185 int diff = isum - INTER_REMAP_COEF_SCALE;
186 int ksize2 = ksize/2, Mk1=ksize2, Mk2=ksize2, mk1=ksize2, mk2=ksize2;
187 for( k1 = ksize2; k1 < ksize2+2; k1++ )
188 for( k2 = ksize2; k2 < ksize2+2; k2++ )
190 if( itab[k1*ksize+k2] < itab[mk1*ksize+mk2] )
192 else if( itab[k1*ksize+k2] > itab[Mk1*ksize+Mk2] )
196 itab[Mk1*ksize + Mk2] = (short)(itab[Mk1*ksize + Mk2] - diff);
198 itab[mk1*ksize + mk2] = (short)(itab[mk1*ksize + mk2] - diff);
201 tab -= INTER_TAB_SIZE2*ksize*ksize;
202 itab -= INTER_TAB_SIZE2*ksize*ksize;
204 if( method == INTER_LINEAR )
206 for( i = 0; i < INTER_TAB_SIZE2; i++ )
207 for( j = 0; j < 4; j++ )
209 BilinearTab_iC4[i][0][j*2] = BilinearTab_i[i][0][0];
210 BilinearTab_iC4[i][0][j*2+1] = BilinearTab_i[i][0][1];
211 BilinearTab_iC4[i][1][j*2] = BilinearTab_i[i][1][0];
212 BilinearTab_iC4[i][1][j*2+1] = BilinearTab_i[i][1][1];
216 inittab[method] = true;
218 return fixpt ? (const void*)itab : (const void*)tab;
222 template<typename ST, typename DT> struct Cast
227 DT operator()(ST val) const { return saturate_cast<DT>(val); }
230 template<typename ST, typename DT, int bits> struct FixedPtCast
234 enum { SHIFT = bits, DELTA = 1 << (bits-1) };
236 DT operator()(ST val) const { return saturate_cast<DT>((val + DELTA)>>SHIFT); }
239 /****************************************************************************************\
241 \****************************************************************************************/
244 resizeNN( const Mat& src, Mat& dst, double fx, double fy )
246 Size ssize = src.size(), dsize = dst.size();
247 AutoBuffer<int> _x_ofs(dsize.width);
249 int pix_size = (int)src.elemSize();
250 int pix_size4 = (int)(pix_size / sizeof(int));
251 double ifx = 1./fx, ify = 1./fy;
254 for( x = 0; x < dsize.width; x++ )
256 int sx = saturate_cast<int>(x*ifx);
257 x_ofs[x] = std::min(sx, ssize.width-1)*pix_size;
260 for( y = 0; y < dsize.height; y++ )
262 uchar* D = dst.data + dst.step*y;
263 int sy = std::min(saturate_cast<int>(y*ify), ssize.height-1);
264 const uchar* S = src.data + src.step*sy;
269 for( x = 0; x <= dsize.width - 2; x += 2 )
271 uchar t0 = S[x_ofs[x]];
272 uchar t1 = S[x_ofs[x+1]];
277 for( ; x < dsize.width; x++ )
281 for( x = 0; x < dsize.width; x++ )
282 *(ushort*)(D + x*2) = *(ushort*)(S + x_ofs[x]);
285 for( x = 0; x < dsize.width; x++, D += 3 )
287 const uchar* _tS = S + x_ofs[x];
288 D[0] = _tS[0]; D[1] = _tS[1]; D[2] = _tS[2];
292 for( x = 0; x < dsize.width; x++ )
293 *(int*)(D + x*4) = *(int*)(S + x_ofs[x]);
296 for( x = 0; x < dsize.width; x++, D += 6 )
298 const ushort* _tS = (const ushort*)(S + x_ofs[x]);
299 ushort* _tD = (ushort*)D;
300 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
304 for( x = 0; x < dsize.width; x++, D += 8 )
306 const int* _tS = (const int*)(S + x_ofs[x]);
308 _tD[0] = _tS[0]; _tD[1] = _tS[1];
312 for( x = 0; x < dsize.width; x++, D += 12 )
314 const int* _tS = (const int*)(S + x_ofs[x]);
316 _tD[0] = _tS[0]; _tD[1] = _tS[1]; _tD[2] = _tS[2];
320 for( x = 0; x < dsize.width; x++, D += pix_size )
322 const int* _tS = (const int*)(S + x_ofs[x]);
324 for( int k = 0; k < pix_size4; k++ )
334 int operator()(const uchar**, uchar*, const uchar*, int ) const { return 0; }
339 int operator()(const uchar**, uchar**, int, const int*,
340 const uchar*, int, int, int, int, int) const { return 0; }
345 struct VResizeLinearVec_32s8u
347 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
349 if( !checkHardwareSupport(CV_CPU_SSE2) )
352 const int** src = (const int**)_src;
353 const short* beta = (const short*)_beta;
354 const int *S0 = src[0], *S1 = src[1];
356 __m128i b0 = _mm_set1_epi16(beta[0]), b1 = _mm_set1_epi16(beta[1]);
357 __m128i delta = _mm_set1_epi16(2);
359 if( (((size_t)S0|(size_t)S1)&15) == 0 )
360 for( ; x <= width - 16; x += 16 )
362 __m128i x0, x1, x2, y0, y1, y2;
363 x0 = _mm_load_si128((const __m128i*)(S0 + x));
364 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
365 y0 = _mm_load_si128((const __m128i*)(S1 + x));
366 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
367 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
368 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
370 x1 = _mm_load_si128((const __m128i*)(S0 + x + 8));
371 x2 = _mm_load_si128((const __m128i*)(S0 + x + 12));
372 y1 = _mm_load_si128((const __m128i*)(S1 + x + 8));
373 y2 = _mm_load_si128((const __m128i*)(S1 + x + 12));
374 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
375 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
377 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
378 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
380 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
381 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
382 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
385 for( ; x <= width - 16; x += 16 )
387 __m128i x0, x1, x2, y0, y1, y2;
388 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
389 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
390 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
391 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
392 x0 = _mm_packs_epi32(_mm_srai_epi32(x0, 4), _mm_srai_epi32(x1, 4));
393 y0 = _mm_packs_epi32(_mm_srai_epi32(y0, 4), _mm_srai_epi32(y1, 4));
395 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 8));
396 x2 = _mm_loadu_si128((const __m128i*)(S0 + x + 12));
397 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 8));
398 y2 = _mm_loadu_si128((const __m128i*)(S1 + x + 12));
399 x1 = _mm_packs_epi32(_mm_srai_epi32(x1, 4), _mm_srai_epi32(x2, 4));
400 y1 = _mm_packs_epi32(_mm_srai_epi32(y1, 4), _mm_srai_epi32(y2, 4));
402 x0 = _mm_adds_epi16(_mm_mulhi_epi16( x0, b0 ), _mm_mulhi_epi16( y0, b1 ));
403 x1 = _mm_adds_epi16(_mm_mulhi_epi16( x1, b0 ), _mm_mulhi_epi16( y1, b1 ));
405 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
406 x1 = _mm_srai_epi16(_mm_adds_epi16(x1, delta), 2);
407 _mm_storeu_si128( (__m128i*)(dst + x), _mm_packus_epi16(x0, x1));
410 for( ; x < width - 4; x += 4 )
413 x0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S0 + x)), 4);
414 y0 = _mm_srai_epi32(_mm_loadu_si128((const __m128i*)(S1 + x)), 4);
415 x0 = _mm_packs_epi32(x0, x0);
416 y0 = _mm_packs_epi32(y0, y0);
417 x0 = _mm_adds_epi16(_mm_mulhi_epi16(x0, b0), _mm_mulhi_epi16(y0, b1));
418 x0 = _mm_srai_epi16(_mm_adds_epi16(x0, delta), 2);
419 x0 = _mm_packus_epi16(x0, x0);
420 *(int*)(dst + x) = _mm_cvtsi128_si32(x0);
428 struct VResizeLinearVec_32f16u
430 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
432 if( !checkHardwareSupport(CV_CPU_SSE2) )
435 const float** src = (const float**)_src;
436 const float* beta = (const float*)_beta;
437 const float *S0 = src[0], *S1 = src[1];
438 ushort* dst = (ushort*)_dst;
441 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
442 __m128i preshift = _mm_set1_epi32(SHRT_MIN);
443 __m128i postshift = _mm_set1_epi16(SHRT_MIN);
445 if( (((size_t)S0|(size_t)S1)&15) == 0 )
446 for( ; x <= width - 16; x += 16 )
448 __m128 x0, x1, y0, y1;
450 x0 = _mm_load_ps(S0 + x);
451 x1 = _mm_load_ps(S0 + x + 4);
452 y0 = _mm_load_ps(S1 + x);
453 y1 = _mm_load_ps(S1 + x + 4);
455 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
456 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
457 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
458 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
459 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
461 x0 = _mm_load_ps(S0 + x + 8);
462 x1 = _mm_load_ps(S0 + x + 12);
463 y0 = _mm_load_ps(S1 + x + 8);
464 y1 = _mm_load_ps(S1 + x + 12);
466 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
467 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
468 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
469 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
470 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
472 _mm_storeu_si128( (__m128i*)(dst + x), t0);
473 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
476 for( ; x <= width - 16; x += 16 )
478 __m128 x0, x1, y0, y1;
480 x0 = _mm_loadu_ps(S0 + x);
481 x1 = _mm_loadu_ps(S0 + x + 4);
482 y0 = _mm_loadu_ps(S1 + x);
483 y1 = _mm_loadu_ps(S1 + x + 4);
485 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
486 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
487 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
488 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
489 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t2), postshift);
491 x0 = _mm_loadu_ps(S0 + x + 8);
492 x1 = _mm_loadu_ps(S0 + x + 12);
493 y0 = _mm_loadu_ps(S1 + x + 8);
494 y1 = _mm_loadu_ps(S1 + x + 12);
496 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
497 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
498 t1 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
499 t2 = _mm_add_epi32(_mm_cvtps_epi32(x1), preshift);
500 t1 = _mm_add_epi16(_mm_packs_epi32(t1, t2), postshift);
502 _mm_storeu_si128( (__m128i*)(dst + x), t0);
503 _mm_storeu_si128( (__m128i*)(dst + x + 8), t1);
506 for( ; x < width - 4; x += 4 )
510 x0 = _mm_loadu_ps(S0 + x);
511 y0 = _mm_loadu_ps(S1 + x);
513 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
514 t0 = _mm_add_epi32(_mm_cvtps_epi32(x0), preshift);
515 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t0), postshift);
516 _mm_storel_epi64( (__m128i*)(dst + x), t0);
524 struct VResizeLinearVec_32f
526 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
528 if( !checkHardwareSupport(CV_CPU_SSE) )
531 const float** src = (const float**)_src;
532 const float* beta = (const float*)_beta;
533 const float *S0 = src[0], *S1 = src[1];
534 float* dst = (float*)_dst;
537 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]);
539 if( (((size_t)S0|(size_t)S1)&15) == 0 )
540 for( ; x <= width - 8; x += 8 )
542 __m128 x0, x1, y0, y1;
543 x0 = _mm_load_ps(S0 + x);
544 x1 = _mm_load_ps(S0 + x + 4);
545 y0 = _mm_load_ps(S1 + x);
546 y1 = _mm_load_ps(S1 + x + 4);
548 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
549 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
551 _mm_storeu_ps( dst + x, x0);
552 _mm_storeu_ps( dst + x + 4, x1);
555 for( ; x <= width - 8; x += 8 )
557 __m128 x0, x1, y0, y1;
558 x0 = _mm_loadu_ps(S0 + x);
559 x1 = _mm_loadu_ps(S0 + x + 4);
560 y0 = _mm_loadu_ps(S1 + x);
561 y1 = _mm_loadu_ps(S1 + x + 4);
563 x0 = _mm_add_ps(_mm_mul_ps(x0, b0), _mm_mul_ps(y0, b1));
564 x1 = _mm_add_ps(_mm_mul_ps(x1, b0), _mm_mul_ps(y1, b1));
566 _mm_storeu_ps( dst + x, x0);
567 _mm_storeu_ps( dst + x + 4, x1);
575 struct VResizeCubicVec_32s8u
577 int operator()(const uchar** _src, uchar* dst, const uchar* _beta, int width ) const
579 if( !checkHardwareSupport(CV_CPU_SSE2) )
582 const int** src = (const int**)_src;
583 const short* beta = (const short*)_beta;
584 const int *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
586 float scale = 1.f/(INTER_RESIZE_COEF_SCALE*INTER_RESIZE_COEF_SCALE);
587 __m128 b0 = _mm_set1_ps(beta[0]*scale), b1 = _mm_set1_ps(beta[1]*scale),
588 b2 = _mm_set1_ps(beta[2]*scale), b3 = _mm_set1_ps(beta[3]*scale);
590 if( (((size_t)S0|(size_t)S1|(size_t)S2|(size_t)S3)&15) == 0 )
591 for( ; x <= width - 8; x += 8 )
593 __m128i x0, x1, y0, y1;
594 __m128 s0, s1, f0, f1;
595 x0 = _mm_load_si128((const __m128i*)(S0 + x));
596 x1 = _mm_load_si128((const __m128i*)(S0 + x + 4));
597 y0 = _mm_load_si128((const __m128i*)(S1 + x));
598 y1 = _mm_load_si128((const __m128i*)(S1 + x + 4));
600 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
601 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
602 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
603 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
604 s0 = _mm_add_ps(s0, f0);
605 s1 = _mm_add_ps(s1, f1);
607 x0 = _mm_load_si128((const __m128i*)(S2 + x));
608 x1 = _mm_load_si128((const __m128i*)(S2 + x + 4));
609 y0 = _mm_load_si128((const __m128i*)(S3 + x));
610 y1 = _mm_load_si128((const __m128i*)(S3 + x + 4));
612 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
613 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
614 s0 = _mm_add_ps(s0, f0);
615 s1 = _mm_add_ps(s1, f1);
616 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
617 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
618 s0 = _mm_add_ps(s0, f0);
619 s1 = _mm_add_ps(s1, f1);
621 x0 = _mm_cvtps_epi32(s0);
622 x1 = _mm_cvtps_epi32(s1);
624 x0 = _mm_packs_epi32(x0, x1);
625 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
628 for( ; x <= width - 8; x += 8 )
630 __m128i x0, x1, y0, y1;
631 __m128 s0, s1, f0, f1;
632 x0 = _mm_loadu_si128((const __m128i*)(S0 + x));
633 x1 = _mm_loadu_si128((const __m128i*)(S0 + x + 4));
634 y0 = _mm_loadu_si128((const __m128i*)(S1 + x));
635 y1 = _mm_loadu_si128((const __m128i*)(S1 + x + 4));
637 s0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b0);
638 s1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b0);
639 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b1);
640 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b1);
641 s0 = _mm_add_ps(s0, f0);
642 s1 = _mm_add_ps(s1, f1);
644 x0 = _mm_loadu_si128((const __m128i*)(S2 + x));
645 x1 = _mm_loadu_si128((const __m128i*)(S2 + x + 4));
646 y0 = _mm_loadu_si128((const __m128i*)(S3 + x));
647 y1 = _mm_loadu_si128((const __m128i*)(S3 + x + 4));
649 f0 = _mm_mul_ps(_mm_cvtepi32_ps(x0), b2);
650 f1 = _mm_mul_ps(_mm_cvtepi32_ps(x1), b2);
651 s0 = _mm_add_ps(s0, f0);
652 s1 = _mm_add_ps(s1, f1);
653 f0 = _mm_mul_ps(_mm_cvtepi32_ps(y0), b3);
654 f1 = _mm_mul_ps(_mm_cvtepi32_ps(y1), b3);
655 s0 = _mm_add_ps(s0, f0);
656 s1 = _mm_add_ps(s1, f1);
658 x0 = _mm_cvtps_epi32(s0);
659 x1 = _mm_cvtps_epi32(s1);
661 x0 = _mm_packs_epi32(x0, x1);
662 _mm_storel_epi64( (__m128i*)(dst + x), _mm_packus_epi16(x0, x0));
670 struct VResizeCubicVec_32f16u
672 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
674 if( !checkHardwareSupport(CV_CPU_SSE2) )
677 const float** src = (const float**)_src;
678 const float* beta = (const float*)_beta;
679 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
680 ushort* dst = (ushort*)_dst;
682 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
683 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
684 __m128i preshift = _mm_set1_epi32(SHRT_MIN);
685 __m128i postshift = _mm_set1_epi16(SHRT_MIN);
687 for( ; x <= width - 8; x += 8 )
689 __m128 x0, x1, y0, y1, s0, s1;
691 x0 = _mm_loadu_ps(S0 + x);
692 x1 = _mm_loadu_ps(S0 + x + 4);
693 y0 = _mm_loadu_ps(S1 + x);
694 y1 = _mm_loadu_ps(S1 + x + 4);
696 s0 = _mm_mul_ps(x0, b0);
697 s1 = _mm_mul_ps(x1, b0);
698 y0 = _mm_mul_ps(y0, b1);
699 y1 = _mm_mul_ps(y1, b1);
700 s0 = _mm_add_ps(s0, y0);
701 s1 = _mm_add_ps(s1, y1);
703 x0 = _mm_loadu_ps(S2 + x);
704 x1 = _mm_loadu_ps(S2 + x + 4);
705 y0 = _mm_loadu_ps(S3 + x);
706 y1 = _mm_loadu_ps(S3 + x + 4);
708 x0 = _mm_mul_ps(x0, b2);
709 x1 = _mm_mul_ps(x1, b2);
710 y0 = _mm_mul_ps(y0, b3);
711 y1 = _mm_mul_ps(y1, b3);
712 s0 = _mm_add_ps(s0, x0);
713 s1 = _mm_add_ps(s1, x1);
714 s0 = _mm_add_ps(s0, y0);
715 s1 = _mm_add_ps(s1, y1);
717 t0 = _mm_add_epi32(_mm_cvtps_epi32(s0), preshift);
718 t1 = _mm_add_epi32(_mm_cvtps_epi32(s1), preshift);
720 t0 = _mm_add_epi16(_mm_packs_epi32(t0, t1), postshift);
721 _mm_storeu_si128( (__m128i*)(dst + x), t0);
729 struct VResizeCubicVec_32f
731 int operator()(const uchar** _src, uchar* _dst, const uchar* _beta, int width ) const
733 if( !checkHardwareSupport(CV_CPU_SSE) )
736 const float** src = (const float**)_src;
737 const float* beta = (const float*)_beta;
738 const float *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
739 float* dst = (float*)_dst;
741 __m128 b0 = _mm_set1_ps(beta[0]), b1 = _mm_set1_ps(beta[1]),
742 b2 = _mm_set1_ps(beta[2]), b3 = _mm_set1_ps(beta[3]);
744 for( ; x <= width - 8; x += 8 )
746 __m128 x0, x1, y0, y1, s0, s1;
747 x0 = _mm_loadu_ps(S0 + x);
748 x1 = _mm_loadu_ps(S0 + x + 4);
749 y0 = _mm_loadu_ps(S1 + x);
750 y1 = _mm_loadu_ps(S1 + x + 4);
752 s0 = _mm_mul_ps(x0, b0);
753 s1 = _mm_mul_ps(x1, b0);
754 y0 = _mm_mul_ps(y0, b1);
755 y1 = _mm_mul_ps(y1, b1);
756 s0 = _mm_add_ps(s0, y0);
757 s1 = _mm_add_ps(s1, y1);
759 x0 = _mm_loadu_ps(S2 + x);
760 x1 = _mm_loadu_ps(S2 + x + 4);
761 y0 = _mm_loadu_ps(S3 + x);
762 y1 = _mm_loadu_ps(S3 + x + 4);
764 x0 = _mm_mul_ps(x0, b2);
765 x1 = _mm_mul_ps(x1, b2);
766 y0 = _mm_mul_ps(y0, b3);
767 y1 = _mm_mul_ps(y1, b3);
768 s0 = _mm_add_ps(s0, x0);
769 s1 = _mm_add_ps(s1, x1);
770 s0 = _mm_add_ps(s0, y0);
771 s1 = _mm_add_ps(s1, y1);
773 _mm_storeu_ps( dst + x, s0);
774 _mm_storeu_ps( dst + x + 4, s1);
781 typedef HResizeNoVec HResizeLinearVec_8u32s;
782 typedef HResizeNoVec HResizeLinearVec_16u32f;
783 typedef HResizeNoVec HResizeLinearVec_32f;
787 typedef HResizeNoVec HResizeLinearVec_8u32s;
788 typedef HResizeNoVec HResizeLinearVec_16u32f;
789 typedef HResizeNoVec HResizeLinearVec_32f;
791 typedef VResizeNoVec VResizeLinearVec_32s8u;
792 typedef VResizeNoVec VResizeLinearVec_32f16u;
793 typedef VResizeNoVec VResizeLinearVec_32f;
795 typedef VResizeNoVec VResizeCubicVec_32s8u;
796 typedef VResizeNoVec VResizeCubicVec_32f16u;
797 typedef VResizeNoVec VResizeCubicVec_32f;
802 template<typename T, typename WT, typename AT, int ONE, class VecOp>
805 typedef T value_type;
807 typedef AT alpha_type;
809 void operator()(const T** src, WT** dst, int count,
810 const int* xofs, const AT* alpha,
811 int swidth, int dwidth, int cn, int xmin, int xmax ) const
816 int dx0 = vecOp((const uchar**)src, (uchar**)dst, count,
817 xofs, (const uchar*)alpha, swidth, dwidth, cn, xmin, xmax );
819 for( k = 0; k <= count - 2; k++ )
821 const T *S0 = src[k], *S1 = src[k+1];
822 WT *D0 = dst[k], *D1 = dst[k+1];
823 for( dx = dx0; dx < xmax; dx++ )
826 WT a0 = alpha[dx*2], a1 = alpha[dx*2+1];
827 WT t0 = S0[sx]*a0 + S0[sx + cn]*a1;
828 WT t1 = S1[sx]*a0 + S1[sx + cn]*a1;
829 D0[dx] = t0; D1[dx] = t1;
832 for( ; dx < dwidth; dx++ )
835 D0[dx] = WT(S0[sx]*ONE); D1[dx] = WT(S1[sx]*ONE);
839 for( ; k < count; k++ )
843 for( dx = 0; dx < xmax; dx++ )
846 D[dx] = S[sx]*alpha[dx*2] + S[sx+cn]*alpha[dx*2+1];
849 for( ; dx < dwidth; dx++ )
850 D[dx] = WT(S[xofs[dx]]*ONE);
856 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
859 typedef T value_type;
861 typedef AT alpha_type;
863 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
865 WT b0 = beta[0], b1 = beta[1];
866 const WT *S0 = src[0], *S1 = src[1];
870 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
871 for( ; x <= width - 4; x += 4 )
874 t0 = S0[x]*b0 + S1[x]*b1;
875 t1 = S0[x+1]*b0 + S1[x+1]*b1;
876 dst[x] = castOp(t0); dst[x+1] = castOp(t1);
877 t0 = S0[x+2]*b0 + S1[x+2]*b1;
878 t1 = S0[x+3]*b0 + S1[x+3]*b1;
879 dst[x+2] = castOp(t0); dst[x+3] = castOp(t1);
882 for( ; x < width; x++ )
883 dst[x] = castOp(S0[x]*b0 + S1[x]*b1);
888 template<typename T, typename WT, typename AT>
891 typedef T value_type;
893 typedef AT alpha_type;
895 void operator()(const T** src, WT** dst, int count,
896 const int* xofs, const AT* alpha,
897 int swidth, int dwidth, int cn, int xmin, int xmax ) const
899 for( int k = 0; k < count; k++ )
903 int dx = 0, limit = xmin;
906 for( ; dx < limit; dx++, alpha += 4 )
908 int j, sx = xofs[dx] - cn;
910 for( j = 0; j < 4; j++ )
913 if( (unsigned)sxj >= (unsigned)swidth )
917 while( sxj >= swidth )
920 v += S[sxj]*alpha[j];
924 if( limit == dwidth )
926 for( ; dx < xmax; dx++, alpha += 4 )
929 D[dx] = S[sx-cn]*alpha[0] + S[sx]*alpha[1] +
930 S[sx+cn]*alpha[2] + S[sx+cn*2]*alpha[3];
940 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
943 typedef T value_type;
945 typedef AT alpha_type;
947 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
949 WT b0 = beta[0], b1 = beta[1], b2 = beta[2], b3 = beta[3];
950 const WT *S0 = src[0], *S1 = src[1], *S2 = src[2], *S3 = src[3];
954 int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
955 for( ; x < width; x++ )
956 dst[x] = castOp(S0[x]*b0 + S1[x]*b1 + S2[x]*b2 + S3[x]*b3);
961 template<typename T, typename WT, typename AT>
962 struct HResizeLanczos4
964 typedef T value_type;
966 typedef AT alpha_type;
968 void operator()(const T** src, WT** dst, int count,
969 const int* xofs, const AT* alpha,
970 int swidth, int dwidth, int cn, int xmin, int xmax ) const
972 for( int k = 0; k < count; k++ )
976 int dx = 0, limit = xmin;
979 for( ; dx < limit; dx++, alpha += 8 )
981 int j, sx = xofs[dx] - cn*3;
983 for( j = 0; j < 8; j++ )
986 if( (unsigned)sxj >= (unsigned)swidth )
990 while( sxj >= swidth )
993 v += S[sxj]*alpha[j];
997 if( limit == dwidth )
999 for( ; dx < xmax; dx++, alpha += 8 )
1002 D[dx] = S[sx-cn*3]*alpha[0] + S[sx-cn*2]*alpha[1] +
1003 S[sx-cn]*alpha[2] + S[sx]*alpha[3] +
1004 S[sx+cn]*alpha[4] + S[sx+cn*2]*alpha[5] +
1005 S[sx+cn*3]*alpha[6] + S[sx+cn*4]*alpha[7];
1015 template<typename T, typename WT, typename AT, class CastOp, class VecOp>
1016 struct VResizeLanczos4
1018 typedef T value_type;
1019 typedef WT buf_type;
1020 typedef AT alpha_type;
1022 void operator()(const WT** src, T* dst, const AT* beta, int width ) const
1026 int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width);
1028 for( ; x <= width - 4; x += 4 )
1031 const WT* S = src[0];
1032 WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b;
1034 for( k = 1; k < 8; k++ )
1036 b = beta[k]; S = src[k];
1037 s0 += S[x]*b; s1 += S[x+1]*b;
1038 s2 += S[x+2]*b; s3 += S[x+3]*b;
1041 dst[x] = castOp(s0); dst[x+1] = castOp(s1);
1042 dst[x+2] = castOp(s2); dst[x+3] = castOp(s3);
1045 for( ; x < width; x++ )
1047 dst[x] = castOp(src[0][x]*beta[0] + src[1][x]*beta[1] +
1048 src[2][x]*beta[2] + src[3][x]*beta[3] + src[4][x]*beta[4] +
1049 src[5][x]*beta[5] + src[6][x]*beta[6] + src[7][x]*beta[7]);
1055 static inline int clip(int x, int a, int b)
1057 return x >= a ? (x < b ? x : b-1) : a;
1060 static const int MAX_ESIZE=16;
1062 template<class HResize, class VResize>
1063 static void resizeGeneric_( const Mat& src, Mat& dst,
1064 const int* xofs, const void* _alpha,
1065 const int* yofs, const void* _beta,
1066 int xmin, int xmax, int ksize )
1068 typedef typename HResize::value_type T;
1069 typedef typename HResize::buf_type WT;
1070 typedef typename HResize::alpha_type AT;
1072 const AT* alpha = (const AT*)_alpha;
1073 const AT* beta = (const AT*)_beta;
1074 Size ssize = src.size(), dsize = dst.size();
1075 int cn = src.channels();
1078 int bufstep = (int)alignSize(dsize.width, 16);
1079 AutoBuffer<WT> _buffer(bufstep*ksize);
1080 const T* srows[MAX_ESIZE]={0};
1081 WT* rows[MAX_ESIZE]={0};
1082 int prev_sy[MAX_ESIZE];
1090 for( k = 0; k < ksize; k++ )
1093 rows[k] = (WT*)_buffer + bufstep*k;
1096 // image resize is a separable operation. In case of not too strong
1097 for( dy = 0; dy < dsize.height; dy++, beta += ksize )
1099 int sy0 = yofs[dy], k, k0=ksize, k1=0, ksize2 = ksize/2;
1101 for( k = 0; k < ksize; k++ )
1103 int sy = clip(sy0 - ksize2 + 1 + k, 0, ssize.height);
1104 for( k1 = std::max(k1, k); k1 < ksize; k1++ )
1106 if( sy == prev_sy[k1] ) // if the sy-th row has been computed already, reuse it.
1109 memcpy( rows[k], rows[k1], bufstep*sizeof(rows[0][0]) );
1114 k0 = std::min(k0, k); // remember the first row that needs to be computed
1115 srows[k] = (const T*)(src.data + src.step*sy);
1120 hresize( srows + k0, rows + k0, ksize - k0, xofs, alpha,
1121 ssize.width, dsize.width, cn, xmin, xmax );
1123 vresize( (const WT**)rows, (T*)(dst.data + dst.step*dy), beta, dsize.width );
1128 template<typename T, typename WT>
1129 static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int* xofs )
1131 Size ssize = src.size(), dsize = dst.size();
1132 int cn = src.channels();
1134 int scale_x = ssize.width/dsize.width;
1135 int scale_y = ssize.height/dsize.height;
1136 int area = scale_x*scale_y;
1137 float scale = 1.f/(scale_x*scale_y);
1140 for( dy = 0; dy < dsize.height; dy++ )
1142 T* D = (T*)(dst.data + dst.step*dy);
1143 for( dx = 0; dx < dsize.width; dx++ )
1145 const T* S = (const T*)(src.data + src.step*dy*scale_y) + xofs[dx];
1147 for( k = 0; k <= area - 4; k += 4 )
1148 sum += S[ofs[k]] + S[ofs[k+1]] + S[ofs[k+2]] + S[ofs[k+3]];
1149 for( ; k < area; k++ )
1152 D[dx] = saturate_cast<T>(sum*scale);
1157 struct DecimateAlpha
1163 template<typename T>
1164 static void resizeArea_( const Mat& src, Mat& dst, const DecimateAlpha* xofs, int xofs_count )
1166 Size ssize = src.size(), dsize = dst.size();
1167 int cn = src.channels();
1169 AutoBuffer<float> _buffer(dsize.width*2);
1170 float *buf = _buffer, *sum = buf + dsize.width;
1171 int k, sy, dx, cur_dy = 0;
1172 float scale_y = (float)ssize.height/dsize.height;
1174 CV_Assert( cn <= 4 );
1175 for( dx = 0; dx < dsize.width; dx++ )
1176 buf[dx] = sum[dx] = 0;
1178 for( sy = 0; sy < ssize.height; sy++ )
1180 const T* S = (const T*)(src.data + src.step*sy);
1182 for( k = 0; k < xofs_count; k++ )
1184 int dxn = xofs[k].di;
1185 float alpha = xofs[k].alpha;
1186 buf[dxn] += S[xofs[k].si]*alpha;
1189 for( k = 0; k < xofs_count; k++ )
1191 int sxn = xofs[k].si;
1192 int dxn = xofs[k].di;
1193 float alpha = xofs[k].alpha;
1194 float t0 = buf[dxn] + S[sxn]*alpha;
1195 float t1 = buf[dxn+1] + S[sxn+1]*alpha;
1196 buf[dxn] = t0; buf[dxn+1] = t1;
1199 for( k = 0; k < xofs_count; k++ )
1201 int sxn = xofs[k].si;
1202 int dxn = xofs[k].di;
1203 float alpha = xofs[k].alpha;
1204 float t0 = buf[dxn] + S[sxn]*alpha;
1205 float t1 = buf[dxn+1] + S[sxn+1]*alpha;
1206 float t2 = buf[dxn+2] + S[sxn+2]*alpha;
1207 buf[dxn] = t0; buf[dxn+1] = t1; buf[dxn+2] = t2;
1210 for( k = 0; k < xofs_count; k++ )
1212 int sxn = xofs[k].si;
1213 int dxn = xofs[k].di;
1214 float alpha = xofs[k].alpha;
1215 float t0 = buf[dxn] + S[sxn]*alpha;
1216 float t1 = buf[dxn+1] + S[sxn+1]*alpha;
1217 buf[dxn] = t0; buf[dxn+1] = t1;
1218 t0 = buf[dxn+2] + S[sxn+2]*alpha;
1219 t1 = buf[dxn+3] + S[sxn+3]*alpha;
1220 buf[dxn+2] = t0; buf[dxn+3] = t1;
1223 if( (cur_dy + 1)*scale_y <= sy + 1 || sy == ssize.height - 1 )
1225 float beta = std::max(sy + 1 - (cur_dy+1)*scale_y, 0.f);
1226 float beta1 = 1 - beta;
1227 T* D = (T*)(dst.data + dst.step*cur_dy);
1228 if( fabs(beta) < 1e-3 )
1229 for( dx = 0; dx < dsize.width; dx++ )
1231 D[dx] = saturate_cast<T>(sum[dx] + buf[dx]);
1232 sum[dx] = buf[dx] = 0;
1235 for( dx = 0; dx < dsize.width; dx++ )
1237 D[dx] = saturate_cast<T>(sum[dx] + buf[dx]*beta1);
1238 sum[dx] = buf[dx]*beta;
1245 for( dx = 0; dx <= dsize.width - 2; dx += 2 )
1247 float t0 = sum[dx] + buf[dx];
1248 float t1 = sum[dx+1] + buf[dx+1];
1249 sum[dx] = t0; sum[dx+1] = t1;
1250 buf[dx] = buf[dx+1] = 0;
1252 for( ; dx < dsize.width; dx++ )
1262 typedef void (*ResizeFunc)( const Mat& src, Mat& dst,
1263 const int* xofs, const void* alpha,
1264 const int* yofs, const void* beta,
1265 int xmin, int xmax, int ksize );
1267 typedef void (*ResizeAreaFastFunc)( const Mat& src, Mat& dst,
1268 const int* ofs, const int *xofs );
1270 typedef void (*ResizeAreaFunc)( const Mat& src, Mat& dst,
1271 const DecimateAlpha* xofs, int xofs_count );
1273 //////////////////////////////////////////////////////////////////////////////////////////
1275 void resize( const Mat& src, Mat& dst, Size dsize,
1276 double inv_scale_x, double inv_scale_y, int interpolation )
1278 static ResizeFunc linear_tab[] =
1281 HResizeLinear<uchar, int, short,
1282 INTER_RESIZE_COEF_SCALE,
1283 HResizeLinearVec_8u32s>,
1284 VResizeLinear<uchar, int, short,
1285 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1286 VResizeLinearVec_32s8u> >,
1289 HResizeLinear<ushort, float, float, 1,
1290 HResizeLinearVec_16u32f>,
1291 VResizeLinear<ushort, float, float, Cast<float, ushort>,
1292 VResizeLinearVec_32f16u> >,
1295 HResizeLinear<float, float, float, 1,
1296 HResizeLinearVec_32f>,
1297 VResizeLinear<float, float, float, Cast<float, float>,
1298 VResizeLinearVec_32f> >,
1302 static ResizeFunc cubic_tab[] =
1305 HResizeCubic<uchar, int, short>,
1306 VResizeCubic<uchar, int, short,
1307 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1308 VResizeCubicVec_32s8u> >,
1311 HResizeCubic<ushort, float, float>,
1312 VResizeCubic<ushort, float, float, Cast<float, ushort>,
1313 VResizeCubicVec_32f16u> >,
1316 HResizeCubic<float, float, float>,
1317 VResizeCubic<float, float, float, Cast<float, float>,
1318 VResizeCubicVec_32f> >,
1322 static ResizeFunc lanczos4_tab[] =
1324 resizeGeneric_<HResizeLanczos4<uchar, int, short>,
1325 VResizeLanczos4<uchar, int, short,
1326 FixedPtCast<int, uchar, INTER_RESIZE_COEF_BITS*2>,
1329 resizeGeneric_<HResizeLanczos4<ushort, float, float>,
1330 VResizeLanczos4<ushort, float, float, Cast<float, ushort>,
1333 resizeGeneric_<HResizeLanczos4<float, float, float>,
1334 VResizeLanczos4<float, float, float, Cast<float, float>,
1339 static ResizeAreaFastFunc areafast_tab[] =
1341 resizeAreaFast_<uchar, int>, 0, resizeAreaFast_<ushort, float>,
1342 0, 0, resizeAreaFast_<float, float>, 0, 0
1345 static ResizeAreaFunc area_tab[] =
1347 resizeArea_<uchar>, 0, resizeArea_<ushort>, 0, 0, resizeArea_<float>, 0, 0
1350 CV_Assert( !(dsize == Size()) || (inv_scale_x > 0 && inv_scale_y > 0) );
1351 if( dsize == Size() )
1353 dsize = Size(saturate_cast<int>(src.cols*inv_scale_x),
1354 saturate_cast<int>(src.rows*inv_scale_y));
1358 inv_scale_x = (double)dsize.width/src.cols;
1359 inv_scale_y = (double)dsize.height/src.rows;
1361 dst.create(dsize, src.type());
1363 Size ssize = src.size();
1364 int depth = src.depth(), cn = src.channels();
1365 double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
1366 int k, sx, sy, dx, dy;
1368 if( interpolation == INTER_NEAREST )
1370 resizeNN( src, dst, inv_scale_x, inv_scale_y );
1374 // true "area" interpolation is only implemented for the case (scale_x <= 1 && scale_y <= 1).
1375 // In other cases it is emulated using some variant of bilinear interpolation
1376 if( interpolation == INTER_AREA && scale_x >= 1 && scale_y >= 1 )
1378 int iscale_x = saturate_cast<int>(scale_x);
1379 int iscale_y = saturate_cast<int>(scale_y);
1381 if( std::abs(scale_x - iscale_x) < DBL_EPSILON &&
1382 std::abs(scale_y - iscale_y) < DBL_EPSILON )
1384 int area = iscale_x*iscale_y;
1385 size_t srcstep = src.step / src.elemSize1();
1386 AutoBuffer<int> _ofs(area + dsize.width*cn);
1388 int* xofs = ofs + area;
1389 ResizeAreaFastFunc func = areafast_tab[depth];
1390 CV_Assert( func != 0 );
1392 for( sy = 0, k = 0; sy < iscale_y; sy++ )
1393 for( sx = 0; sx < iscale_x; sx++ )
1394 ofs[k++] = (int)(sy*srcstep + sx*cn);
1396 for( dx = 0; dx < dsize.width; dx++ )
1398 sx = dx*iscale_x*cn;
1399 for( k = 0; k < cn; k++ )
1400 xofs[dx*cn + k] = sx + k;
1403 func( src, dst, ofs, xofs );
1407 ResizeAreaFunc func = area_tab[depth];
1408 CV_Assert( func != 0 && cn <= 4 );
1410 AutoBuffer<DecimateAlpha> _xofs(ssize.width*2);
1411 DecimateAlpha* xofs = _xofs;
1412 double scale = 1.f/(scale_x*scale_y);
1414 for( dx = 0, k = 0; dx < dsize.width; dx++ )
1416 double fsx1 = dx*scale_x, fsx2 = fsx1 + scale_x;
1417 int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2);
1418 sx1 = std::min(sx1, ssize.width-1);
1419 sx2 = std::min(sx2, ssize.width-1);
1423 assert( k < ssize.width*2 );
1425 xofs[k].si = (sx1-1)*cn;
1426 xofs[k++].alpha = (float)((sx1 - fsx1)*scale);
1429 for( sx = sx1; sx < sx2; sx++ )
1431 assert( k < ssize.width*2 );
1434 xofs[k++].alpha = (float)scale;
1437 if( fsx2 - sx2 > 1e-3 )
1439 assert( k < ssize.width*2 );
1441 xofs[k].si = sx2*cn;
1442 xofs[k++].alpha = (float)((fsx2 - sx2)*scale);
1446 func( src, dst, xofs, k );
1450 int xmin = 0, xmax = dsize.width, width = dsize.width*cn;
1451 bool area_mode = interpolation == INTER_AREA;
1452 bool fixpt = depth == CV_8U;
1455 int ksize=0, ksize2;
1456 if( interpolation == INTER_CUBIC )
1457 ksize = 4, func = cubic_tab[depth];
1458 else if( interpolation == INTER_LANCZOS4 )
1459 ksize = 8, func = lanczos4_tab[depth];
1460 else if( interpolation == INTER_LINEAR || interpolation == INTER_AREA )
1461 ksize = 2, func = linear_tab[depth];
1463 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
1466 CV_Assert( func != 0 );
1468 AutoBuffer<uchar> _buffer((width + dsize.height)*(sizeof(int) + sizeof(float)*ksize));
1469 int* xofs = (int*)(uchar*)_buffer;
1470 int* yofs = xofs + width;
1471 float* alpha = (float*)(yofs + dsize.height);
1472 short* ialpha = (short*)alpha;
1473 float* beta = alpha + width*ksize;
1474 short* ibeta = ialpha + width*ksize;
1475 float cbuf[MAX_ESIZE];
1477 for( dx = 0; dx < dsize.width; dx++ )
1481 fx = (float)((dx+0.5)*scale_x - 0.5);
1487 sx = cvFloor(dx*scale_x);
1488 fx = (float)((dx+1) - (sx+1)*inv_scale_x);
1489 fx = fx <= 0 ? 0.f : fx - cvFloor(fx);
1499 if( sx + ksize2 >= ssize.width )
1501 xmax = std::min( xmax, dx );
1502 if( sx >= ssize.width-1 )
1503 fx = 0, sx = ssize.width-1;
1506 for( k = 0, sx *= cn; k < cn; k++ )
1507 xofs[dx*cn + k] = sx + k;
1509 if( interpolation == INTER_CUBIC )
1510 interpolateCubic( fx, cbuf );
1511 else if( interpolation == INTER_LANCZOS4 )
1512 interpolateLanczos4( fx, cbuf );
1520 for( k = 0; k < ksize; k++ )
1521 ialpha[dx*cn*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
1522 for( ; k < cn*ksize; k++ )
1523 ialpha[dx*cn*ksize + k] = ialpha[dx*cn*ksize + k - ksize];
1527 for( k = 0; k < ksize; k++ )
1528 alpha[dx*cn*ksize + k] = cbuf[k];
1529 for( ; k < cn*ksize; k++ )
1530 alpha[dx*cn*ksize + k] = alpha[dx*cn*ksize + k - ksize];
1534 for( dy = 0; dy < dsize.height; dy++ )
1538 fy = (float)((dy+0.5)*scale_y - 0.5);
1544 sy = cvFloor(dy*scale_y);
1545 fy = (float)((dy+1) - (sy+1)*inv_scale_y);
1546 fy = fy <= 0 ? 0.f : fy - cvFloor(fy);
1550 if( interpolation == INTER_CUBIC )
1551 interpolateCubic( fy, cbuf );
1552 else if( interpolation == INTER_LANCZOS4 )
1553 interpolateLanczos4( fy, cbuf );
1562 for( k = 0; k < ksize; k++ )
1563 ibeta[dy*ksize + k] = saturate_cast<short>(cbuf[k]*INTER_RESIZE_COEF_SCALE);
1567 for( k = 0; k < ksize; k++ )
1568 beta[dy*ksize + k] = cbuf[k];
1572 func( src, dst, xofs, fixpt ? (void*)ialpha : (void*)alpha, yofs,
1573 fixpt ? (void*)ibeta : (void*)beta, xmin, xmax, ksize );
1577 /****************************************************************************************\
1578 * General warping (affine, perspective, remap) *
1579 \****************************************************************************************/
1581 template<typename T>
1582 static void remapNearest( const Mat& _src, Mat& _dst, const Mat& _xy,
1583 int borderType, const Scalar& _borderValue )
1585 Size ssize = _src.size(), dsize = _dst.size();
1586 int cn = _src.channels();
1587 const T* S0 = (const T*)_src.data;
1588 size_t sstep = _src.step/sizeof(S0[0]);
1589 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
1590 saturate_cast<T>(_borderValue[1]),
1591 saturate_cast<T>(_borderValue[2]),
1592 saturate_cast<T>(_borderValue[3]));
1595 unsigned width1 = ssize.width, height1 = ssize.height;
1597 if( _dst.isContinuous() && _xy.isContinuous() )
1599 dsize.width *= dsize.height;
1603 for( dy = 0; dy < dsize.height; dy++ )
1605 T* D = (T*)(_dst.data + _dst.step*dy);
1606 const short* XY = (const short*)(_xy.data + _xy.step*dy);
1610 for( dx = 0; dx < dsize.width; dx++ )
1612 int sx = XY[dx*2], sy = XY[dx*2+1];
1613 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
1614 D[dx] = S0[sy*sstep + sx];
1617 if( borderType == BORDER_REPLICATE )
1619 sx = clip(sx, 0, ssize.width);
1620 sy = clip(sy, 0, ssize.height);
1621 D[dx] = S0[sy*sstep + sx];
1623 else if( borderType == BORDER_CONSTANT )
1625 else if( borderType != BORDER_TRANSPARENT )
1627 sx = borderInterpolate(sx, ssize.width, borderType);
1628 sy = borderInterpolate(sy, ssize.height, borderType);
1629 D[dx] = S0[sy*sstep + sx];
1636 for( dx = 0; dx < dsize.width; dx++, D += cn )
1638 int sx = XY[dx*2], sy = XY[dx*2+1], k;
1640 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
1644 S = S0 + sy*sstep + sx*3;
1645 D[0] = S[0], D[1] = S[1], D[2] = S[2];
1649 S = S0 + sy*sstep + sx*4;
1650 D[0] = S[0], D[1] = S[1], D[2] = S[2], D[3] = S[3];
1654 S = S0 + sy*sstep + sx*cn;
1655 for( k = 0; k < cn; k++ )
1659 else if( borderType != BORDER_TRANSPARENT )
1661 if( borderType == BORDER_REPLICATE )
1663 sx = clip(sx, 0, ssize.width);
1664 sy = clip(sy, 0, ssize.height);
1665 S = S0 + sy*sstep + sx*cn;
1667 else if( borderType == BORDER_CONSTANT )
1671 sx = borderInterpolate(sx, ssize.width, borderType);
1672 sy = borderInterpolate(sy, ssize.height, borderType);
1673 S = S0 + sy*sstep + sx*cn;
1675 for( k = 0; k < cn; k++ )
1686 int operator()( const Mat&, void*, const short*, const ushort*,
1687 const void*, int ) const { return 0; }
1694 int operator()( const Mat& _src, void* _dst, const short* XY,
1695 const ushort* FXY, const void* _wtab, int width ) const
1697 int cn = _src.channels();
1699 if( (cn != 1 && cn != 3 && cn != 4) || !checkHardwareSupport(CV_CPU_SSE2) )
1702 const uchar *S0 = _src.data, *S1 = _src.data + _src.step;
1703 const short* wtab = cn == 1 ? (const short*)_wtab : &BilinearTab_iC4[0][0][0];
1704 uchar* D = (uchar*)_dst;
1705 int x = 0, sstep = (int)_src.step;
1706 __m128i delta = _mm_set1_epi32(INTER_REMAP_COEF_SCALE/2);
1707 __m128i xy2ofs = _mm_set1_epi32(cn + (sstep << 16));
1708 __m128i z = _mm_setzero_si128();
1709 int CV_DECL_ALIGNED(16) iofs0[4], iofs1[4];
1713 for( ; x <= width - 8; x += 8 )
1715 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
1716 __m128i xy1 = _mm_loadu_si128( (const __m128i*)(XY + x*2 + 8));
1717 __m128i v0, v1, v2, v3, a0, a1, b0, b1;
1720 xy0 = _mm_madd_epi16( xy0, xy2ofs );
1721 xy1 = _mm_madd_epi16( xy1, xy2ofs );
1722 _mm_store_si128( (__m128i*)iofs0, xy0 );
1723 _mm_store_si128( (__m128i*)iofs1, xy1 );
1725 i0 = *(ushort*)(S0 + iofs0[0]) + (*(ushort*)(S0 + iofs0[1]) << 16);
1726 i1 = *(ushort*)(S0 + iofs0[2]) + (*(ushort*)(S0 + iofs0[3]) << 16);
1727 v0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
1728 i0 = *(ushort*)(S1 + iofs0[0]) + (*(ushort*)(S1 + iofs0[1]) << 16);
1729 i1 = *(ushort*)(S1 + iofs0[2]) + (*(ushort*)(S1 + iofs0[3]) << 16);
1730 v1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
1731 v0 = _mm_unpacklo_epi8(v0, z);
1732 v1 = _mm_unpacklo_epi8(v1, z);
1734 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x]*4)),
1735 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+1]*4)));
1736 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+2]*4)),
1737 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+3]*4)));
1738 b0 = _mm_unpacklo_epi64(a0, a1);
1739 b1 = _mm_unpackhi_epi64(a0, a1);
1740 v0 = _mm_madd_epi16(v0, b0);
1741 v1 = _mm_madd_epi16(v1, b1);
1742 v0 = _mm_add_epi32(_mm_add_epi32(v0, v1), delta);
1744 i0 = *(ushort*)(S0 + iofs1[0]) + (*(ushort*)(S0 + iofs1[1]) << 16);
1745 i1 = *(ushort*)(S0 + iofs1[2]) + (*(ushort*)(S0 + iofs1[3]) << 16);
1746 v2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
1747 i0 = *(ushort*)(S1 + iofs1[0]) + (*(ushort*)(S1 + iofs1[1]) << 16);
1748 i1 = *(ushort*)(S1 + iofs1[2]) + (*(ushort*)(S1 + iofs1[3]) << 16);
1749 v3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
1750 v2 = _mm_unpacklo_epi8(v2, z);
1751 v3 = _mm_unpacklo_epi8(v3, z);
1753 a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+4]*4)),
1754 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+5]*4)));
1755 a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(wtab+FXY[x+6]*4)),
1756 _mm_loadl_epi64((__m128i*)(wtab+FXY[x+7]*4)));
1757 b0 = _mm_unpacklo_epi64(a0, a1);
1758 b1 = _mm_unpackhi_epi64(a0, a1);
1759 v2 = _mm_madd_epi16(v2, b0);
1760 v3 = _mm_madd_epi16(v3, b1);
1761 v2 = _mm_add_epi32(_mm_add_epi32(v2, v3), delta);
1763 v0 = _mm_srai_epi32(v0, INTER_REMAP_COEF_BITS);
1764 v2 = _mm_srai_epi32(v2, INTER_REMAP_COEF_BITS);
1765 v0 = _mm_packus_epi16(_mm_packs_epi32(v0, v2), z);
1766 _mm_storel_epi64( (__m128i*)(D + x), v0 );
1771 for( ; x <= width - 5; x += 4, D += 12 )
1773 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
1774 __m128i u0, v0, u1, v1;
1776 xy0 = _mm_madd_epi16( xy0, xy2ofs );
1777 _mm_store_si128( (__m128i*)iofs0, xy0 );
1778 const __m128i *w0, *w1;
1779 w0 = (const __m128i*)(wtab + FXY[x]*16);
1780 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
1782 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
1783 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 3)));
1784 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
1785 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 3)));
1786 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
1787 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 3)));
1788 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
1789 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 3)));
1790 u0 = _mm_unpacklo_epi8(u0, z);
1791 v0 = _mm_unpacklo_epi8(v0, z);
1792 u1 = _mm_unpacklo_epi8(u1, z);
1793 v1 = _mm_unpacklo_epi8(v1, z);
1794 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
1795 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
1796 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
1797 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
1798 u0 = _mm_slli_si128(u0, 4);
1799 u0 = _mm_packs_epi32(u0, u1);
1800 u0 = _mm_packus_epi16(u0, u0);
1801 _mm_storel_epi64((__m128i*)D, _mm_srli_si128(u0,1));
1803 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
1804 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
1806 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
1807 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 3)));
1808 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
1809 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 3)));
1810 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
1811 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 3)));
1812 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
1813 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 3)));
1814 u0 = _mm_unpacklo_epi8(u0, z);
1815 v0 = _mm_unpacklo_epi8(v0, z);
1816 u1 = _mm_unpacklo_epi8(u1, z);
1817 v1 = _mm_unpacklo_epi8(v1, z);
1818 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
1819 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
1820 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
1821 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
1822 u0 = _mm_slli_si128(u0, 4);
1823 u0 = _mm_packs_epi32(u0, u1);
1824 u0 = _mm_packus_epi16(u0, u0);
1825 _mm_storel_epi64((__m128i*)(D + 6), _mm_srli_si128(u0,1));
1830 for( ; x <= width - 4; x += 4, D += 16 )
1832 __m128i xy0 = _mm_loadu_si128( (const __m128i*)(XY + x*2));
1833 __m128i u0, v0, u1, v1;
1835 xy0 = _mm_madd_epi16( xy0, xy2ofs );
1836 _mm_store_si128( (__m128i*)iofs0, xy0 );
1837 const __m128i *w0, *w1;
1838 w0 = (const __m128i*)(wtab + FXY[x]*16);
1839 w1 = (const __m128i*)(wtab + FXY[x+1]*16);
1841 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[0])),
1842 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[0] + 4)));
1843 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[0])),
1844 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[0] + 4)));
1845 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[1])),
1846 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[1] + 4)));
1847 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[1])),
1848 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[1] + 4)));
1849 u0 = _mm_unpacklo_epi8(u0, z);
1850 v0 = _mm_unpacklo_epi8(v0, z);
1851 u1 = _mm_unpacklo_epi8(u1, z);
1852 v1 = _mm_unpacklo_epi8(v1, z);
1853 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
1854 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
1855 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
1856 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
1857 u0 = _mm_packs_epi32(u0, u1);
1858 u0 = _mm_packus_epi16(u0, u0);
1859 _mm_storel_epi64((__m128i*)D, u0);
1861 w0 = (const __m128i*)(wtab + FXY[x+2]*16);
1862 w1 = (const __m128i*)(wtab + FXY[x+3]*16);
1864 u0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[2])),
1865 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[2] + 4)));
1866 v0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[2])),
1867 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[2] + 4)));
1868 u1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S0 + iofs0[3])),
1869 _mm_cvtsi32_si128(*(int*)(S0 + iofs0[3] + 4)));
1870 v1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(int*)(S1 + iofs0[3])),
1871 _mm_cvtsi32_si128(*(int*)(S1 + iofs0[3] + 4)));
1872 u0 = _mm_unpacklo_epi8(u0, z);
1873 v0 = _mm_unpacklo_epi8(v0, z);
1874 u1 = _mm_unpacklo_epi8(u1, z);
1875 v1 = _mm_unpacklo_epi8(v1, z);
1876 u0 = _mm_add_epi32(_mm_madd_epi16(u0, w0[0]), _mm_madd_epi16(v0, w0[1]));
1877 u1 = _mm_add_epi32(_mm_madd_epi16(u1, w1[0]), _mm_madd_epi16(v1, w1[1]));
1878 u0 = _mm_srai_epi32(_mm_add_epi32(u0, delta), INTER_REMAP_COEF_BITS);
1879 u1 = _mm_srai_epi32(_mm_add_epi32(u1, delta), INTER_REMAP_COEF_BITS);
1880 u0 = _mm_packs_epi32(u0, u1);
1881 u0 = _mm_packus_epi16(u0, u0);
1882 _mm_storel_epi64((__m128i*)(D + 8), u0);
1892 typedef RemapNoVec RemapVec_8u;
1897 template<class CastOp, class VecOp, typename AT>
1898 static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
1899 const Mat& _fxy, const void* _wtab,
1900 int borderType, const Scalar& _borderValue )
1902 typedef typename CastOp::rtype T;
1903 typedef typename CastOp::type1 WT;
1904 Size ssize = _src.size(), dsize = _dst.size();
1905 int cn = _src.channels();
1906 const AT* wtab = (const AT*)_wtab;
1907 const T* S0 = (const T*)_src.data;
1908 size_t sstep = _src.step/sizeof(S0[0]);
1909 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
1910 saturate_cast<T>(_borderValue[1]),
1911 saturate_cast<T>(_borderValue[2]),
1912 saturate_cast<T>(_borderValue[3]));
1917 unsigned width1 = std::max(ssize.width-1, 0), height1 = std::max(ssize.height-1, 0);
1918 CV_Assert( cn <= 4 );
1920 if( _src.type() == CV_8UC3 )
1921 width1 = std::max(ssize.width-2, 0);
1924 for( dy = 0; dy < dsize.height; dy++ )
1926 T* D = (T*)(_dst.data + _dst.step*dy);
1927 const short* XY = (const short*)(_xy.data + _xy.step*dy);
1928 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
1930 bool prevInlier = false;
1932 for( dx = 0; dx <= dsize.width; dx++ )
1934 bool curInlier = dx < dsize.width ?
1935 (unsigned)XY[dx*2] < width1 &&
1936 (unsigned)XY[dx*2+1] < height1 : !prevInlier;
1937 if( curInlier == prevInlier )
1943 prevInlier = curInlier;
1947 int len = vecOp( _src, D, XY + dx*2, FXY + dx, wtab, X1 - dx );
1953 for( ; dx < X1; dx++, D++ )
1955 int sx = XY[dx*2], sy = XY[dx*2+1];
1956 const AT* w = wtab + FXY[dx]*4;
1957 const T* S = S0 + sy*sstep + sx;
1958 *D = castOp(WT(S[0]*w[0] + S[1]*w[1] + S[sstep]*w[2] + S[sstep+1]*w[3]));
1962 for( ; dx < X1; dx++, D += 2 )
1964 int sx = XY[dx*2], sy = XY[dx*2+1];
1965 const AT* w = wtab + FXY[dx]*4;
1966 const T* S = S0 + sy*sstep + sx*2;
1967 WT t0 = S[0]*w[0] + S[2]*w[1] + S[sstep]*w[2] + S[sstep+2]*w[3];
1968 WT t1 = S[1]*w[0] + S[3]*w[1] + S[sstep+1]*w[2] + S[sstep+3]*w[3];
1969 D[0] = castOp(t0); D[1] = castOp(t1);
1972 for( ; dx < X1; dx++, D += 3 )
1974 int sx = XY[dx*2], sy = XY[dx*2+1];
1975 const AT* w = wtab + FXY[dx]*4;
1976 const T* S = S0 + sy*sstep + sx*3;
1977 WT t0 = S[0]*w[0] + S[3]*w[1] + S[sstep]*w[2] + S[sstep+3]*w[3];
1978 WT t1 = S[1]*w[0] + S[4]*w[1] + S[sstep+1]*w[2] + S[sstep+4]*w[3];
1979 WT t2 = S[2]*w[0] + S[5]*w[1] + S[sstep+2]*w[2] + S[sstep+5]*w[3];
1980 D[0] = castOp(t0); D[1] = castOp(t1); D[2] = castOp(t2);
1983 for( ; dx < X1; dx++, D += 4 )
1985 int sx = XY[dx*2], sy = XY[dx*2+1];
1986 const AT* w = wtab + FXY[dx]*4;
1987 const T* S = S0 + sy*sstep + sx*4;
1988 WT t0 = S[0]*w[0] + S[4]*w[1] + S[sstep]*w[2] + S[sstep+4]*w[3];
1989 WT t1 = S[1]*w[0] + S[5]*w[1] + S[sstep+1]*w[2] + S[sstep+5]*w[3];
1990 D[0] = castOp(t0); D[1] = castOp(t1);
1991 t0 = S[2]*w[0] + S[6]*w[1] + S[sstep+2]*w[2] + S[sstep+6]*w[3];
1992 t1 = S[3]*w[0] + S[7]*w[1] + S[sstep+3]*w[2] + S[sstep+7]*w[3];
1993 D[2] = castOp(t0); D[3] = castOp(t1);
1998 if( borderType == BORDER_TRANSPARENT && cn != 3 )
2006 for( ; dx < X1; dx++, D++ )
2008 int sx = XY[dx*2], sy = XY[dx*2+1];
2009 if( borderType == BORDER_CONSTANT &&
2010 (sx >= ssize.width || sx+1 < 0 ||
2011 sy >= ssize.height || sy+1 < 0) )
2017 int sx0, sx1, sy0, sy1;
2019 const AT* w = wtab + FXY[dx]*4;
2020 if( borderType == BORDER_REPLICATE )
2022 sx0 = clip(sx, 0, ssize.width);
2023 sx1 = clip(sx+1, 0, ssize.width);
2024 sy0 = clip(sy, 0, ssize.height);
2025 sy1 = clip(sy+1, 0, ssize.height);
2026 v0 = S0[sy0*sstep + sx0];
2027 v1 = S0[sy0*sstep + sx1];
2028 v2 = S0[sy1*sstep + sx0];
2029 v3 = S0[sy1*sstep + sx1];
2033 sx0 = borderInterpolate(sx, ssize.width, borderType);
2034 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
2035 sy0 = borderInterpolate(sy, ssize.height, borderType);
2036 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
2037 v0 = sx0 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx0] : cval[0];
2038 v1 = sx1 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx1] : cval[0];
2039 v2 = sx0 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx0] : cval[0];
2040 v3 = sx1 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx1] : cval[0];
2042 D[0] = castOp(WT(v0*w[0] + v1*w[1] + v2*w[2] + v3*w[3]));
2046 for( ; dx < X1; dx++, D += cn )
2048 int sx = XY[dx*2], sy = XY[dx*2+1], k;
2049 if( borderType == BORDER_CONSTANT &&
2050 (sx >= ssize.width || sx+1 < 0 ||
2051 sy >= ssize.height || sy+1 < 0) )
2053 for( k = 0; k < cn; k++ )
2058 int sx0, sx1, sy0, sy1;
2059 const T *v0, *v1, *v2, *v3;
2060 const AT* w = wtab + FXY[dx]*4;
2061 if( borderType == BORDER_REPLICATE )
2063 sx0 = clip(sx, 0, ssize.width);
2064 sx1 = clip(sx+1, 0, ssize.width);
2065 sy0 = clip(sy, 0, ssize.height);
2066 sy1 = clip(sy+1, 0, ssize.height);
2067 v0 = S0 + sy0*sstep + sx0*cn;
2068 v1 = S0 + sy0*sstep + sx1*cn;
2069 v2 = S0 + sy1*sstep + sx0*cn;
2070 v3 = S0 + sy1*sstep + sx1*cn;
2072 else if( borderType == BORDER_TRANSPARENT &&
2073 (unsigned)sx >= (unsigned)(ssize.width-1) &&
2074 (unsigned)sy >= (unsigned)(ssize.height-1))
2078 sx0 = borderInterpolate(sx, ssize.width, borderType);
2079 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
2080 sy0 = borderInterpolate(sy, ssize.height, borderType);
2081 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
2082 v0 = sx0 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx0*cn : &cval[0];
2083 v1 = sx1 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx1*cn : &cval[0];
2084 v2 = sx0 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx0*cn : &cval[0];
2085 v3 = sx1 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx1*cn : &cval[0];
2087 for( k = 0; k < cn; k++ )
2088 D[k] = castOp(WT(v0[k]*w[0] + v1[k]*w[1] + v2[k]*w[2] + v3[k]*w[3]));
2097 template<class CastOp, typename AT, int ONE>
2098 static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
2099 const Mat& _fxy, const void* _wtab,
2100 int borderType, const Scalar& _borderValue )
2102 typedef typename CastOp::rtype T;
2103 typedef typename CastOp::type1 WT;
2104 Size ssize = _src.size(), dsize = _dst.size();
2105 int cn = _src.channels();
2106 const AT* wtab = (const AT*)_wtab;
2107 const T* S0 = (const T*)_src.data;
2108 size_t sstep = _src.step/sizeof(S0[0]);
2109 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2110 saturate_cast<T>(_borderValue[1]),
2111 saturate_cast<T>(_borderValue[2]),
2112 saturate_cast<T>(_borderValue[3]));
2116 unsigned width1 = std::max(ssize.width-3, 0), height1 = std::max(ssize.height-3, 0);
2118 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
2120 dsize.width *= dsize.height;
2124 for( dy = 0; dy < dsize.height; dy++ )
2126 T* D = (T*)(_dst.data + _dst.step*dy);
2127 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2128 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2130 for( dx = 0; dx < dsize.width; dx++, D += cn )
2132 int sx = XY[dx*2]-1, sy = XY[dx*2+1]-1;
2133 const AT* w = wtab + FXY[dx]*16;
2135 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2137 const T* S = S0 + sy*sstep + sx*cn;
2138 for( k = 0; k < cn; k++ )
2140 WT sum = S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3];
2142 sum += S[0]*w[4] + S[cn]*w[5] + S[cn*2]*w[6] + S[cn*3]*w[7];
2144 sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
2146 sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
2154 if( borderType == BORDER_TRANSPARENT &&
2155 ((unsigned)(sx+1) >= (unsigned)ssize.width ||
2156 (unsigned)(sy+1) >= (unsigned)ssize.height) )
2159 if( borderType == BORDER_CONSTANT &&
2160 (sx >= ssize.width || sx+4 <= 0 ||
2161 sy >= ssize.height || sy+4 <= 0))
2163 for( k = 0; k < cn; k++ )
2168 for( i = 0; i < 4; i++ )
2170 x[i] = borderInterpolate(sx + i, ssize.width, borderType)*cn;
2171 y[i] = borderInterpolate(sy + i, ssize.height, borderType);
2174 for( k = 0; k < cn; k++, S0++, w -= 16 )
2176 WT cv = cval[k], sum = cv*ONE;
2177 for( i = 0; i < 4; i++, w += 4 )
2180 const T* S = S0 + yi*sstep;
2184 sum += (S[x[0]] - cv)*w[0];
2186 sum += (S[x[1]] - cv)*w[1];
2188 sum += (S[x[2]] - cv)*w[2];
2190 sum += (S[x[3]] - cv)*w[3];
2201 template<class CastOp, typename AT, int ONE>
2202 static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
2203 const Mat& _fxy, const void* _wtab,
2204 int borderType, const Scalar& _borderValue )
2206 typedef typename CastOp::rtype T;
2207 typedef typename CastOp::type1 WT;
2208 Size ssize = _src.size(), dsize = _dst.size();
2209 int cn = _src.channels();
2210 const AT* wtab = (const AT*)_wtab;
2211 const T* S0 = (const T*)_src.data;
2212 size_t sstep = _src.step/sizeof(S0[0]);
2213 Scalar_<T> cval(saturate_cast<T>(_borderValue[0]),
2214 saturate_cast<T>(_borderValue[1]),
2215 saturate_cast<T>(_borderValue[2]),
2216 saturate_cast<T>(_borderValue[3]));
2220 unsigned width1 = std::max(ssize.width-7, 0), height1 = std::max(ssize.height-7, 0);
2222 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
2224 dsize.width *= dsize.height;
2228 for( dy = 0; dy < dsize.height; dy++ )
2230 T* D = (T*)(_dst.data + _dst.step*dy);
2231 const short* XY = (const short*)(_xy.data + _xy.step*dy);
2232 const ushort* FXY = (const ushort*)(_fxy.data + _fxy.step*dy);
2234 for( dx = 0; dx < dsize.width; dx++, D += cn )
2236 int sx = XY[dx*2]-3, sy = XY[dx*2+1]-3;
2237 const AT* w = wtab + FXY[dx]*64;
2238 const T* S = S0 + sy*sstep + sx*cn;
2240 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
2242 for( k = 0; k < cn; k++ )
2245 for( int r = 0; r < 8; r++, S += sstep, w += 8 )
2246 sum += S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3] +
2247 S[cn*4]*w[4] + S[cn*5]*w[5] + S[cn*6]*w[6] + S[cn*7]*w[7];
2256 if( borderType == BORDER_TRANSPARENT &&
2257 ((unsigned)(sx+3) >= (unsigned)ssize.width ||
2258 (unsigned)(sy+3) >= (unsigned)ssize.height) )
2261 if( borderType == BORDER_CONSTANT &&
2262 (sx >= ssize.width || sx+8 <= 0 ||
2263 sy >= ssize.height || sy+8 <= 0))
2265 for( k = 0; k < cn; k++ )
2270 for( i = 0; i < 8; i++ )
2272 x[i] = borderInterpolate(sx + i, ssize.width, borderType)*cn;
2273 y[i] = borderInterpolate(sy + i, ssize.height, borderType);
2276 for( k = 0; k < cn; k++, S0++, w -= 64 )
2278 WT cv = cval[k], sum = cv*ONE;
2279 for( i = 0; i < 8; i++, w += 8 )
2282 const T* S = S0 + yi*sstep;
2286 sum += (S[x[0]] - cv)*w[0];
2288 sum += (S[x[1]] - cv)*w[1];
2290 sum += (S[x[2]] - cv)*w[2];
2292 sum += (S[x[3]] - cv)*w[3];
2294 sum += (S[x[4]] - cv)*w[4];
2296 sum += (S[x[5]] - cv)*w[5];
2298 sum += (S[x[6]] - cv)*w[6];
2300 sum += (S[x[7]] - cv)*w[7];
2311 typedef void (*RemapNNFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
2312 int borderType, const Scalar& _borderValue );
2314 typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
2315 const Mat& _fxy, const void* _wtab,
2316 int borderType, const Scalar& _borderValue);
2318 void remap( const Mat& src, Mat& dst, const Mat& map1, const Mat& map2,
2319 int interpolation, int borderType, const Scalar& borderValue )
2321 static RemapNNFunc nn_tab[] =
2323 remapNearest<uchar>, remapNearest<uchar>, remapNearest<ushort>, remapNearest<ushort>,
2324 remapNearest<int>, remapNearest<int>, remapNearest<double>, 0
2327 static RemapFunc linear_tab[] =
2329 remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u, short>, 0,
2330 remapBilinear<Cast<float, ushort>, RemapNoVec, float>,
2331 remapBilinear<Cast<float, short>, RemapNoVec, float>, 0,
2332 remapBilinear<Cast<float, float>, RemapNoVec, float>, 0, 0
2335 static RemapFunc cubic_tab[] =
2337 remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
2338 remapBicubic<Cast<float, ushort>, float, 1>,
2339 remapBicubic<Cast<float, short>, float, 1>, 0,
2340 remapBicubic<Cast<float, float>, float, 1>, 0, 0
2343 static RemapFunc lanczos4_tab[] =
2345 remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
2346 remapLanczos4<Cast<float, ushort>, float, 1>,
2347 remapLanczos4<Cast<float, short>, float, 1>, 0,
2348 remapLanczos4<Cast<float, float>, float, 1>, 0, 0
2351 CV_Assert( (!map2.data || map2.size() == map1.size()));
2352 dst.create( map1.size(), src.type() );
2353 CV_Assert(dst.data != src.data);
2355 int depth = src.depth(), map_depth = map1.depth();
2356 RemapNNFunc nnfunc = 0;
2357 RemapFunc ifunc = 0;
2358 const void* ctab = 0;
2359 bool fixpt = depth == CV_8U;
2360 bool planar_input = false;
2362 if( interpolation == INTER_NEAREST )
2364 nnfunc = nn_tab[depth];
2365 CV_Assert( nnfunc != 0 );
2367 if( map1.type() == CV_16SC2 && !map2.data ) // the data is already in the right format
2369 nnfunc( src, dst, map1, borderType, borderValue );
2375 if( interpolation == INTER_AREA )
2376 interpolation = INTER_LINEAR;
2378 if( interpolation == INTER_LINEAR )
2379 ifunc = linear_tab[depth];
2380 else if( interpolation == INTER_CUBIC )
2381 ifunc = cubic_tab[depth];
2382 else if( interpolation == INTER_LANCZOS4 )
2383 ifunc = lanczos4_tab[depth];
2385 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
2386 CV_Assert( ifunc != 0 );
2387 ctab = initInterTab2D( interpolation, fixpt );
2390 const Mat *m1 = &map1, *m2 = &map2;
2392 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ||
2393 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1)) )
2395 if( map1.type() != CV_16SC2 )
2399 ifunc( src, dst, *m1, *m2, ctab, borderType, borderValue );
2405 CV_Assert( (map1.type() == CV_32FC2 && !map2.data) ||
2406 (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
2407 planar_input = map1.channels() == 1;
2411 const int buf_size = 1 << 14;
2412 int brows0 = std::min(128, dst.rows);
2413 int bcols0 = std::min(buf_size/brows0, dst.cols);
2414 brows0 = std::min(buf_size/bcols0, dst.rows);
2416 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
2419 Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
2421 _bufa.create(brows0, bcols0, CV_16UC1);
2423 for( y = 0; y < dst.rows; y += brows0 )
2425 for( x = 0; x < dst.cols; x += bcols0 )
2427 int brows = std::min(brows0, dst.rows - y);
2428 int bcols = std::min(bcols0, dst.cols - x);
2429 Mat dpart(dst, Rect(x, y, bcols, brows));
2430 Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
2434 if( map_depth != CV_32F )
2436 for( y1 = 0; y1 < brows; y1++ )
2438 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2439 const short* sXY = (const short*)(m1->data + m1->step*(y+y1)) + x*2;
2440 const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
2442 for( x1 = 0; x1 < bcols; x1++ )
2444 int a = sA[x1] & (INTER_TAB_SIZE2-1);
2445 XY[x1*2] = sXY[x1*2] + NNDeltaTab_i[a][0];
2446 XY[x1*2+1] = sXY[x1*2+1] + NNDeltaTab_i[a][1];
2450 else if( !planar_input )
2451 map1(Rect(0,0,bcols,brows)).convertTo(bufxy, bufxy.depth());
2454 for( y1 = 0; y1 < brows; y1++ )
2456 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2457 const float* sX = (const float*)(map1.data + map1.step*(y+y1)) + x;
2458 const float* sY = (const float*)(map2.data + map2.step*(y+y1)) + x;
2464 for( ; x1 <= bcols - 8; x1 += 8 )
2466 __m128 fx0 = _mm_loadu_ps(sX + x1);
2467 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
2468 __m128 fy0 = _mm_loadu_ps(sY + x1);
2469 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
2470 __m128i ix0 = _mm_cvtps_epi32(fx0);
2471 __m128i ix1 = _mm_cvtps_epi32(fx1);
2472 __m128i iy0 = _mm_cvtps_epi32(fy0);
2473 __m128i iy1 = _mm_cvtps_epi32(fy1);
2474 ix0 = _mm_packs_epi32(ix0, ix1);
2475 iy0 = _mm_packs_epi32(iy0, iy1);
2476 ix1 = _mm_unpacklo_epi16(ix0, iy0);
2477 iy1 = _mm_unpackhi_epi16(ix0, iy0);
2478 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
2479 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
2484 for( ; x1 < bcols; x1++ )
2486 XY[x1*2] = saturate_cast<short>(sX[x1]);
2487 XY[x1*2+1] = saturate_cast<short>(sY[x1]);
2491 nnfunc( src, dpart, bufxy, borderType, borderValue );
2495 Mat bufa(_bufa, Rect(0,0,bcols, brows));
2496 for( y1 = 0; y1 < brows; y1++ )
2498 short* XY = (short*)(bufxy.data + bufxy.step*y1);
2499 ushort* A = (ushort*)(bufa.data + bufa.step*y1);
2503 const float* sX = (const float*)(map1.data + map1.step*(y+y1)) + x;
2504 const float* sY = (const float*)(map2.data + map2.step*(y+y1)) + x;
2510 __m128 scale = _mm_set1_ps((float)INTER_TAB_SIZE);
2511 __m128i mask = _mm_set1_epi32(INTER_TAB_SIZE-1);
2512 for( ; x1 <= bcols - 8; x1 += 8 )
2514 __m128 fx0 = _mm_loadu_ps(sX + x1);
2515 __m128 fx1 = _mm_loadu_ps(sX + x1 + 4);
2516 __m128 fy0 = _mm_loadu_ps(sY + x1);
2517 __m128 fy1 = _mm_loadu_ps(sY + x1 + 4);
2518 __m128i ix0 = _mm_cvtps_epi32(_mm_mul_ps(fx0, scale));
2519 __m128i ix1 = _mm_cvtps_epi32(_mm_mul_ps(fx1, scale));
2520 __m128i iy0 = _mm_cvtps_epi32(_mm_mul_ps(fy0, scale));
2521 __m128i iy1 = _mm_cvtps_epi32(_mm_mul_ps(fy1, scale));
2522 __m128i mx0 = _mm_and_si128(ix0, mask);
2523 __m128i mx1 = _mm_and_si128(ix1, mask);
2524 __m128i my0 = _mm_and_si128(iy0, mask);
2525 __m128i my1 = _mm_and_si128(iy1, mask);
2526 mx0 = _mm_packs_epi32(mx0, mx1);
2527 my0 = _mm_packs_epi32(my0, my1);
2528 my0 = _mm_slli_epi16(my0, INTER_BITS);
2529 mx0 = _mm_or_si128(mx0, my0);
2530 _mm_storeu_si128((__m128i*)(A + x1), mx0);
2531 ix0 = _mm_srai_epi32(ix0, INTER_BITS);
2532 ix1 = _mm_srai_epi32(ix1, INTER_BITS);
2533 iy0 = _mm_srai_epi32(iy0, INTER_BITS);
2534 iy1 = _mm_srai_epi32(iy1, INTER_BITS);
2535 ix0 = _mm_packs_epi32(ix0, ix1);
2536 iy0 = _mm_packs_epi32(iy0, iy1);
2537 ix1 = _mm_unpacklo_epi16(ix0, iy0);
2538 iy1 = _mm_unpackhi_epi16(ix0, iy0);
2539 _mm_storeu_si128((__m128i*)(XY + x1*2), ix1);
2540 _mm_storeu_si128((__m128i*)(XY + x1*2 + 8), iy1);
2545 for( ; x1 < bcols; x1++ )
2547 int sx = cvRound(sX[x1]*INTER_TAB_SIZE);
2548 int sy = cvRound(sY[x1]*INTER_TAB_SIZE);
2549 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
2550 XY[x1*2] = (short)(sx >> INTER_BITS);
2551 XY[x1*2+1] = (short)(sy >> INTER_BITS);
2557 const float* sXY = (const float*)(map1.data + map1.step*(y+y1)) + x*2;
2559 for( x1 = 0; x1 < bcols; x1++ )
2561 int sx = cvRound(sXY[x1*2]*INTER_TAB_SIZE);
2562 int sy = cvRound(sXY[x1*2+1]*INTER_TAB_SIZE);
2563 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
2564 XY[x1*2] = (short)(sx >> INTER_BITS);
2565 XY[x1*2+1] = (short)(sy >> INTER_BITS);
2570 ifunc(src, dpart, bufxy, bufa, ctab, borderType, borderValue);
2576 void convertMaps( const Mat& map1, const Mat& map2, Mat& dstmap1, Mat& dstmap2,
2577 int dstm1type, bool nninterpolate )
2579 Size size = map1.size();
2580 const Mat *m1 = &map1, *m2 = &map2;
2581 int m1type = m1->type(), m2type = m2->type();
2583 CV_Assert( (m1type == CV_16SC2 && (nninterpolate || m2type == CV_16UC1 || m2type == CV_16SC1)) ||
2584 (m2type == CV_16SC2 && (nninterpolate || m1type == CV_16UC1 || m1type == CV_16SC1)) ||
2585 (m1type == CV_32FC1 && m2type == CV_32FC1) ||
2586 (m1type == CV_32FC2 && !m2->data) );
2588 if( m2type == CV_16SC2 )
2590 std::swap( m1, m2 );
2591 std::swap( m1type, m2type );
2594 if( dstm1type <= 0 )
2595 dstm1type = m1type == CV_16SC2 ? CV_32FC2 : CV_16SC2;
2596 CV_Assert( dstm1type == CV_16SC2 || dstm1type == CV_32FC1 || dstm1type == CV_32FC2 );
2597 dstmap1.create( size, dstm1type );
2598 if( !nninterpolate && dstm1type != CV_32FC2 )
2599 dstmap2.create( size, dstm1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
2603 if( m1type == dstm1type || (nninterpolate &&
2604 ((m1type == CV_16SC2 && dstm1type == CV_32FC2) ||
2605 (m1type == CV_32FC2 && dstm1type == CV_16SC2))) )
2607 m1->convertTo( dstmap1, dstmap1.type() );
2608 if( dstmap2.data && dstmap2.type() == m2->type() )
2609 m2->copyTo( dstmap2 );
2613 if( m1type == CV_32FC1 && dstm1type == CV_32FC2 )
2615 Mat vdata[] = { *m1, *m2 };
2616 merge( vdata, 2, dstmap1 );
2620 if( m1type == CV_32FC2 && dstm1type == CV_32FC1 )
2622 Mat mv[] = { dstmap1, dstmap2 };
2627 if( m1->isContinuous() && (!m2->data || m2->isContinuous()) &&
2628 dstmap1.isContinuous() && (!dstmap2.data || dstmap2.isContinuous()) )
2630 size.width *= size.height;
2634 const float scale = 1.f/INTER_TAB_SIZE;
2636 for( y = 0; y < size.height; y++ )
2638 const float* src1f = (const float*)(m1->data + m1->step*y);
2639 const float* src2f = (const float*)(m2->data + m2->step*y);
2640 const short* src1 = (const short*)src1f;
2641 const ushort* src2 = (const ushort*)src2f;
2643 float* dst1f = (float*)(dstmap1.data + dstmap1.step*y);
2644 float* dst2f = (float*)(dstmap2.data + dstmap2.step*y);
2645 short* dst1 = (short*)dst1f;
2646 ushort* dst2 = (ushort*)dst2f;
2648 if( m1type == CV_32FC1 && dstm1type == CV_16SC2 )
2651 for( x = 0; x < size.width; x++ )
2653 dst1[x*2] = saturate_cast<short>(src1f[x]);
2654 dst1[x*2+1] = saturate_cast<short>(src2f[x]);
2657 for( x = 0; x < size.width; x++ )
2659 int ix = saturate_cast<int>(src1f[x]*INTER_TAB_SIZE);
2660 int iy = saturate_cast<int>(src2f[x]*INTER_TAB_SIZE);
2661 dst1[x*2] = (short)(ix >> INTER_BITS);
2662 dst1[x*2+1] = (short)(iy >> INTER_BITS);
2663 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
2666 else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
2669 for( x = 0; x < size.width; x++ )
2671 dst1[x*2] = saturate_cast<short>(src1f[x*2]);
2672 dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
2675 for( x = 0; x < size.width; x++ )
2677 int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
2678 int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
2679 dst1[x*2] = (short)(ix >> INTER_BITS);
2680 dst1[x*2+1] = (short)(iy >> INTER_BITS);
2681 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
2684 else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
2686 for( x = 0; x < size.width; x++ )
2688 int fxy = src2 ? src2[x] : 0;
2689 dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2690 dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2693 else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
2695 for( x = 0; x < size.width; x++ )
2697 int fxy = src2 ? src2[x] : 0;
2698 dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2699 dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2703 CV_Error( CV_StsNotImplemented, "Unsupported combination of input/output matrices" );
2708 void warpAffine( const Mat& src, Mat& dst, const Mat& M0, Size dsize,
2709 int flags, int borderType, const Scalar& borderValue )
2711 dst.create( dsize, src.type() );
2712 CV_Assert( dst.data != src.data );
2714 const int BLOCK_SZ = 64;
2715 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
2717 Mat matM(2, 3, CV_64F, M);
2718 int interpolation = flags & INTER_MAX;
2719 if( interpolation == INTER_AREA )
2720 interpolation = INTER_LINEAR;
2722 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 );
2723 M0.convertTo(matM, matM.type());
2725 if( !(flags & WARP_INVERSE_MAP) )
2727 double D = M[0]*M[4] - M[1]*M[3];
2728 D = D != 0 ? 1./D : 0;
2729 double A11 = M[4]*D, A22=M[0]*D;
2730 M[0] = A11; M[1] *= -D;
2731 M[3] *= -D; M[4] = A22;
2732 double b1 = -M[0]*M[2] - M[1]*M[5];
2733 double b2 = -M[3]*M[2] - M[4]*M[5];
2734 M[2] = b1; M[5] = b2;
2737 int x, y, x1, y1, width = dst.cols, height = dst.rows;
2738 AutoBuffer<int> _abdelta(width*2);
2739 int* adelta = &_abdelta[0], *bdelta = adelta + width;
2740 const int AB_BITS = MAX(10, (int)INTER_BITS);
2741 const int AB_SCALE = 1 << AB_BITS;
2742 int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2;
2744 bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
2747 for( x = 0; x < width; x++ )
2749 adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
2750 bdelta[x] = saturate_cast<int>(M[3]*x*AB_SCALE);
2753 int bh0 = std::min(BLOCK_SZ/2, height);
2754 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
2755 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
2757 for( y = 0; y < height; y += bh0 )
2759 for( x = 0; x < width; x += bw0 )
2761 int bw = std::min( bw0, width - x);
2762 int bh = std::min( bh0, height - y);
2764 Mat _XY(bh, bw, CV_16SC2, XY), matA;
2765 Mat dpart(dst, Rect(x, y, bw, bh));
2767 for( y1 = 0; y1 < bh; y1++ )
2769 short* xy = XY + y1*bw*2;
2770 int X0 = saturate_cast<int>((M[1]*(y + y1) + M[2])*AB_SCALE) + round_delta;
2771 int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
2773 if( interpolation == INTER_NEAREST )
2774 for( x1 = 0; x1 < bw; x1++ )
2776 int X = (X0 + adelta[x+x1]) >> AB_BITS;
2777 int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
2778 xy[x1*2] = (short)X;
2779 xy[x1*2+1] = (short)Y;
2783 short* alpha = A + y1*bw;
2788 __m128i fxy_mask = _mm_set1_epi32(INTER_TAB_SIZE - 1);
2789 __m128i XX = _mm_set1_epi32(X0), YY = _mm_set1_epi32(Y0);
2790 for( ; x1 <= bw - 8; x1 += 8 )
2792 __m128i tx0, tx1, ty0, ty1;
2793 tx0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1)), XX);
2794 ty0 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1)), YY);
2795 tx1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(adelta + x + x1 + 4)), XX);
2796 ty1 = _mm_add_epi32(_mm_loadu_si128((const __m128i*)(bdelta + x + x1 + 4)), YY);
2798 tx0 = _mm_srai_epi32(tx0, AB_BITS - INTER_BITS);
2799 ty0 = _mm_srai_epi32(ty0, AB_BITS - INTER_BITS);
2800 tx1 = _mm_srai_epi32(tx1, AB_BITS - INTER_BITS);
2801 ty1 = _mm_srai_epi32(ty1, AB_BITS - INTER_BITS);
2803 __m128i fx_ = _mm_packs_epi32(_mm_and_si128(tx0, fxy_mask),
2804 _mm_and_si128(tx1, fxy_mask));
2805 __m128i fy_ = _mm_packs_epi32(_mm_and_si128(ty0, fxy_mask),
2806 _mm_and_si128(ty1, fxy_mask));
2807 tx0 = _mm_packs_epi32(_mm_srai_epi32(tx0, INTER_BITS),
2808 _mm_srai_epi32(tx1, INTER_BITS));
2809 ty0 = _mm_packs_epi32(_mm_srai_epi32(ty0, INTER_BITS),
2810 _mm_srai_epi32(ty1, INTER_BITS));
2811 fx_ = _mm_add_epi16(fx_, _mm_slli_epi16(fy_, INTER_BITS));
2813 _mm_storeu_si128((__m128i*)(xy + x1*2), _mm_unpacklo_epi16(tx0, ty0));
2814 _mm_storeu_si128((__m128i*)(xy + x1*2 + 8), _mm_unpackhi_epi16(tx0, ty0));
2815 _mm_storeu_si128((__m128i*)(alpha + x1), fx_);
2819 for( ; x1 < bw; x1++ )
2821 int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
2822 int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
2823 xy[x1*2] = (short)(X >> INTER_BITS);
2824 xy[x1*2+1] = (short)(Y >> INTER_BITS);
2825 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
2826 (X & (INTER_TAB_SIZE-1)));
2831 if( interpolation == INTER_NEAREST )
2832 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
2835 Mat matA(bh, bw, CV_16U, A);
2836 remap( src, dpart, _XY, matA, interpolation, borderType, borderValue );
2843 void warpPerspective( const Mat& src, Mat& dst, const Mat& M0, Size dsize,
2844 int flags, int borderType, const Scalar& borderValue )
2846 dst.create( dsize, src.type() );
2847 CV_Assert( dst.data != src.data );
2849 const int BLOCK_SZ = 32;
2850 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
2852 Mat matM(3, 3, CV_64F, M);
2853 int interpolation = flags & INTER_MAX;
2854 if( interpolation == INTER_AREA )
2855 interpolation = INTER_LINEAR;
2857 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
2858 M0.convertTo(matM, matM.type());
2860 if( !(flags & WARP_INVERSE_MAP) )
2863 int x, y, x1, y1, width = dst.cols, height = dst.rows;
2865 int bh0 = std::min(BLOCK_SZ/2, height);
2866 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
2867 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
2869 for( y = 0; y < height; y += bh0 )
2871 for( x = 0; x < width; x += bw0 )
2873 int bw = std::min( bw0, width - x);
2874 int bh = std::min( bh0, height - y);
2876 Mat _XY(bh, bw, CV_16SC2, XY), matA;
2877 Mat dpart(dst, Rect(x, y, bw, bh));
2879 for( y1 = 0; y1 < bh; y1++ )
2881 short* xy = XY + y1*bw*2;
2882 double X0 = M[0]*x + M[1]*(y + y1) + M[2];
2883 double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
2884 double W0 = M[6]*x + M[7]*(y + y1) + M[8];
2886 if( interpolation == INTER_NEAREST )
2887 for( x1 = 0; x1 < bw; x1++ )
2889 double W = W0 + M[6]*x1;
2891 int X = saturate_cast<int>((X0 + M[0]*x1)*W);
2892 int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
2893 xy[x1*2] = (short)X;
2894 xy[x1*2+1] = (short)Y;
2898 short* alpha = A + y1*bw;
2899 for( x1 = 0; x1 < bw; x1++ )
2901 double W = W0 + M[6]*x1;
2902 W = W ? INTER_TAB_SIZE/W : 0;
2903 int X = saturate_cast<int>((X0 + M[0]*x1)*W);
2904 int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
2905 xy[x1*2] = (short)(X >> INTER_BITS);
2906 xy[x1*2+1] = (short)(Y >> INTER_BITS);
2907 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
2908 (X & (INTER_TAB_SIZE-1)));
2913 if( interpolation == INTER_NEAREST )
2914 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
2917 Mat matA(bh, bw, CV_16U, A);
2918 remap( src, dpart, _XY, matA, interpolation, borderType, borderValue );
2925 Mat getRotationMatrix2D( Point2f center, double angle, double scale )
2928 double alpha = cos(angle)*scale;
2929 double beta = sin(angle)*scale;
2931 Mat M(2, 3, CV_64F);
2932 double* m = (double*)M.data;
2936 m[2] = (1-alpha)*center.x - beta*center.y;
2939 m[5] = beta*center.x + (1-alpha)*center.y;
2944 /* Calculates coefficients of perspective transformation
2945 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
2947 * c00*xi + c01*yi + c02
2948 * ui = ---------------------
2949 * c20*xi + c21*yi + c22
2951 * c10*xi + c11*yi + c12
2952 * vi = ---------------------
2953 * c20*xi + c21*yi + c22
2955 * Coefficients are calculated by solving linear system:
2956 * / x0 y0 1 0 0 0 -x0*u0 -y0*u0 \ /c00\ /u0\
2957 * | x1 y1 1 0 0 0 -x1*u1 -y1*u1 | |c01| |u1|
2958 * | x2 y2 1 0 0 0 -x2*u2 -y2*u2 | |c02| |u2|
2959 * | x3 y3 1 0 0 0 -x3*u3 -y3*u3 |.|c10|=|u3|,
2960 * | 0 0 0 x0 y0 1 -x0*v0 -y0*v0 | |c11| |v0|
2961 * | 0 0 0 x1 y1 1 -x1*v1 -y1*v1 | |c12| |v1|
2962 * | 0 0 0 x2 y2 1 -x2*v2 -y2*v2 | |c20| |v2|
2963 * \ 0 0 0 x3 y3 1 -x3*v3 -y3*v3 / \c21/ \v3/
2966 * cij - matrix coefficients, c22 = 1
2968 Mat getPerspectiveTransform( const Point2f src[], const Point2f dst[] )
2970 Mat M(3, 3, CV_64F), X(8, 1, CV_64F, M.data);
2971 double a[8][8], b[8];
2972 Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b);
2974 for( int i = 0; i < 4; ++i )
2976 a[i][0] = a[i+4][3] = src[i].x;
2977 a[i][1] = a[i+4][4] = src[i].y;
2978 a[i][2] = a[i+4][5] = 1;
2979 a[i][3] = a[i][4] = a[i][5] =
2980 a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
2981 a[i][6] = -src[i].x*dst[i].x;
2982 a[i][7] = -src[i].y*dst[i].x;
2983 a[i+4][6] = -src[i].x*dst[i].y;
2984 a[i+4][7] = -src[i].y*dst[i].y;
2989 solve( A, B, X, DECOMP_SVD );
2990 ((double*)M.data)[8] = 1.;
2995 /* Calculates coefficients of affine transformation
2996 * which maps (xi,yi) to (ui,vi), (i=1,2,3):
2998 * ui = c00*xi + c01*yi + c02
3000 * vi = c10*xi + c11*yi + c12
3002 * Coefficients are calculated by solving linear system:
3003 * / x0 y0 1 0 0 0 \ /c00\ /u0\
3004 * | x1 y1 1 0 0 0 | |c01| |u1|
3005 * | x2 y2 1 0 0 0 | |c02| |u2|
3006 * | 0 0 0 x0 y0 1 | |c10| |v0|
3007 * | 0 0 0 x1 y1 1 | |c11| |v1|
3008 * \ 0 0 0 x2 y2 1 / |c12| |v2|
3011 * cij - matrix coefficients
3013 Mat getAffineTransform( const Point2f src[], const Point2f dst[] )
3015 Mat M(2, 3, CV_64F), X(6, 1, CV_64F, M.data);
3016 double a[6*6], b[6];
3017 Mat A(6, 6, CV_64F, a), B(6, 1, CV_64F, b);
3019 for( int i = 0; i < 3; i++ )
3023 a[j] = a[k+3] = src[i].x;
3024 a[j+1] = a[k+4] = src[i].y;
3025 a[j+2] = a[k+5] = 1;
3026 a[j+3] = a[j+4] = a[j+5] = 0;
3027 a[k] = a[k+1] = a[k+2] = 0;
3029 b[i*2+1] = dst[i].y;
3036 void invertAffineTransform(const Mat& matM, Mat& _iM)
3038 CV_Assert(matM.rows == 2 && matM.cols == 3);
3039 _iM.create(2, 3, matM.type());
3040 if( matM.type() == CV_32F )
3042 const float* M = (const float*)matM.data;
3043 float* iM = (float*)_iM.data;
3044 int step = matM.step/sizeof(M[0]), istep = _iM.step/sizeof(iM[0]);
3046 double D = M[0]*M[step+1] - M[1]*M[step];
3047 D = D != 0 ? 1./D : 0;
3048 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3049 double b1 = -A11*M[2] - A12*M[step+2];
3050 double b2 = -A21*M[2] - A22*M[step+2];
3052 iM[0] = (float)A11; iM[1] = (float)A12; iM[2] = (float)b1;
3053 iM[istep] = (float)A21; iM[istep+1] = (float)A22; iM[istep+2] = (float)b2;
3055 else if( matM.type() == CV_64F )
3057 const double* M = (const double*)matM.data;
3058 double* iM = (double*)_iM.data;
3059 int step = matM.step/sizeof(M[0]), istep = _iM.step/sizeof(iM[0]);
3061 double D = M[0]*M[step+1] - M[1]*M[step];
3062 D = D != 0 ? 1./D : 0;
3063 double A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3064 double b1 = -A11*M[2] - A12*M[step+2];
3065 double b2 = -A21*M[2] - A22*M[step+2];
3067 iM[0] = A11; iM[1] = A12; iM[2] = b1;
3068 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3071 CV_Error( CV_StsUnsupportedFormat, "" );
3077 cvResize( const CvArr* srcarr, CvArr* dstarr, int method )
3079 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3080 CV_Assert( src.type() == dst.type() );
3081 cv::resize( src, dst, dst.size(), (double)dst.cols/src.cols,
3082 (double)dst.rows/src.rows, method );
3087 cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3088 int flags, CvScalar fillval )
3090 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3091 cv::Mat matrix = cv::cvarrToMat(marr);
3092 CV_Assert( src.type() == dst.type() );
3093 cv::warpAffine( src, dst, matrix, dst.size(), flags,
3094 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3099 cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3100 int flags, CvScalar fillval )
3102 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3103 cv::Mat matrix = cv::cvarrToMat(marr);
3104 CV_Assert( src.type() == dst.type() );
3105 cv::warpPerspective( src, dst, matrix, dst.size(), flags,
3106 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3111 cvRemap( const CvArr* srcarr, CvArr* dstarr,
3112 const CvArr* _mapx, const CvArr* _mapy,
3113 int flags, CvScalar fillval )
3115 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
3116 cv::Mat mapx = cv::cvarrToMat(_mapx), mapy = cv::cvarrToMat(_mapy);
3117 CV_Assert( src.type() == dst.type() && dst.size() == mapx.size() );
3118 cv::remap( src, dst, mapx, mapy, flags & cv::INTER_MAX,
3119 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3121 CV_Assert( dst0.data == dst.data );
3126 cv2DRotationMatrix( CvPoint2D32f center, double angle,
3127 double scale, CvMat* matrix )
3129 cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale);
3130 CV_Assert( M.size() == M.size() );
3131 M.convertTo(M0, M0.type());
3137 cvGetPerspectiveTransform( const CvPoint2D32f* src,
3138 const CvPoint2D32f* dst,
3141 cv::Mat M0 = cv::cvarrToMat(matrix),
3142 M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3143 CV_Assert( M.size() == M.size() );
3144 M.convertTo(M0, M0.type());
3150 cvGetAffineTransform( const CvPoint2D32f* src,
3151 const CvPoint2D32f* dst,
3154 cv::Mat M0 = cv::cvarrToMat(matrix),
3155 M = cv::getAffineTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3156 CV_Assert( M.size() == M0.size() );
3157 M.convertTo(M0, M0.type());
3163 cvConvertMaps( const CvArr* arr1, const CvArr* arr2, CvArr* dstarr1, CvArr* dstarr2 )
3165 cv::Mat map1 = cv::cvarrToMat(arr1), map2;
3166 cv::Mat dstmap1 = cv::cvarrToMat(dstarr1), dstmap2;
3169 map2 = cv::cvarrToMat(arr2);
3172 dstmap2 = cv::cvarrToMat(dstarr2);
3173 if( dstmap2.type() == CV_16SC1 )
3174 dstmap2 = cv::Mat(dstmap2.size(), CV_16UC1, dstmap2.data, dstmap2.step);
3177 cv::convertMaps( map1, map2, dstmap1, dstmap2, dstmap1.type(), false );
3180 /****************************************************************************************\
3181 * Log-Polar Transform *
3182 \****************************************************************************************/
3184 /* now it is done via Remap; more correct implementation should use
3185 some super-sampling technique outside of the "fovea" circle */
3187 cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
3188 CvPoint2D32f center, double M, int flags )
3190 cv::Ptr<CvMat> mapx, mapy;
3192 CvMat srcstub, *src = cvGetMat(srcarr, &srcstub);
3193 CvMat dststub, *dst = cvGetMat(dstarr, &dststub);
3194 CvSize ssize, dsize;
3196 if( !CV_ARE_TYPES_EQ( src, dst ))
3197 CV_Error( CV_StsUnmatchedFormats, "" );
3200 CV_Error( CV_StsOutOfRange, "M should be >0" );
3202 ssize = cvGetMatSize(src);
3203 dsize = cvGetMatSize(dst);
3205 mapx = cvCreateMat( dsize.height, dsize.width, CV_32F );
3206 mapy = cvCreateMat( dsize.height, dsize.width, CV_32F );
3208 if( !(flags & CV_WARP_INVERSE_MAP) )
3211 cv::AutoBuffer<double> _exp_tab(dsize.width);
3212 double* exp_tab = _exp_tab;
3214 for( rho = 0; rho < dst->width; rho++ )
3215 exp_tab[rho] = std::exp(rho/M);
3217 for( phi = 0; phi < dsize.height; phi++ )
3219 double cp = cos(phi*2*CV_PI/(dsize.height-1));
3220 double sp = sin(phi*2*CV_PI/(dsize.height-1));
3221 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
3222 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
3224 for( rho = 0; rho < dsize.width; rho++ )
3226 double r = exp_tab[rho];
3227 double x = r*cp + center.x;
3228 double y = r*sp + center.y;
3238 CvMat bufx, bufy, bufp, bufa;
3239 double ascale = (ssize.height-1)/(2*CV_PI);
3240 cv::AutoBuffer<float> _buf(4*dsize.width);
3243 bufx = cvMat( 1, dsize.width, CV_32F, buf );
3244 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
3245 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
3246 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
3248 for( x = 0; x < dsize.width; x++ )
3249 bufx.data.fl[x] = (float)x - center.x;
3251 for( y = 0; y < dsize.height; y++ )
3253 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
3254 float* my = (float*)(mapy->data.ptr + y*mapy->step);
3256 for( x = 0; x < dsize.width; x++ )
3257 bufy.data.fl[x] = (float)y - center.y;
3260 cvCartToPolar( &bufx, &bufy, &bufp, &bufa );
3262 for( x = 0; x < dsize.width; x++ )
3263 bufp.data.fl[x] += 1.f;
3265 cvLog( &bufp, &bufp );
3267 for( x = 0; x < dsize.width; x++ )
3269 double rho = bufp.data.fl[x]*M;
3270 double phi = bufa.data.fl[x]*ascale;
3276 for( x = 0; x < dsize.width; x++ )
3278 double xx = bufx.data.fl[x];
3279 double yy = bufy.data.fl[x];
3281 double p = log(sqrt(xx*xx + yy*yy) + 1.)*M;
3282 double a = atan2(yy,xx);
3294 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );
3298 /****************************************************************************************
3299 Linear-Polar Transform
3300 J.L. Blanco, Apr 2009
3301 ****************************************************************************************/
3303 void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
3304 CvPoint2D32f center, double maxRadius, int flags )
3306 cv::Ptr<CvMat> mapx, mapy;
3308 CvMat srcstub, *src = (CvMat*)srcarr;
3309 CvMat dststub, *dst = (CvMat*)dstarr;
3310 CvSize ssize, dsize;
3312 src = cvGetMat( srcarr, &srcstub,0,0 );
3313 dst = cvGetMat( dstarr, &dststub,0,0 );
3315 if( !CV_ARE_TYPES_EQ( src, dst ))
3316 CV_Error( CV_StsUnmatchedFormats, "" );
3318 ssize.width = src->cols;
3319 ssize.height = src->rows;
3320 dsize.width = dst->cols;
3321 dsize.height = dst->rows;
3323 mapx = cvCreateMat( dsize.height, dsize.width, CV_32F );
3324 mapy = cvCreateMat( dsize.height, dsize.width, CV_32F );
3326 if( !(flags & CV_WARP_INVERSE_MAP) )
3330 for( phi = 0; phi < dsize.height; phi++ )
3332 double cp = cos(phi*2*CV_PI/(dsize.height-1));
3333 double sp = sin(phi*2*CV_PI/(dsize.height-1));
3334 float* mx = (float*)(mapx->data.ptr + phi*mapx->step);
3335 float* my = (float*)(mapy->data.ptr + phi*mapy->step);
3337 for( rho = 0; rho < dsize.width; rho++ )
3339 double r = maxRadius*(rho+1)/double(dsize.width-1);
3340 double x = r*cp + center.x;
3341 double y = r*sp + center.y;
3351 CvMat bufx, bufy, bufp, bufa;
3352 const double ascale = (ssize.height-1)/(2*CV_PI);
3353 const double pscale = (ssize.width-1)/maxRadius;
3355 cv::AutoBuffer<float> _buf(4*dsize.width);
3358 bufx = cvMat( 1, dsize.width, CV_32F, buf );
3359 bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width );
3360 bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 );
3361 bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 );
3363 for( x = 0; x < dsize.width; x++ )
3364 bufx.data.fl[x] = (float)x - center.x;
3366 for( y = 0; y < dsize.height; y++ )
3368 float* mx = (float*)(mapx->data.ptr + y*mapx->step);
3369 float* my = (float*)(mapy->data.ptr + y*mapy->step);
3371 for( x = 0; x < dsize.width; x++ )
3372 bufy.data.fl[x] = (float)y - center.y;
3374 cvCartToPolar( &bufx, &bufy, &bufp, &bufa, 0 );
3376 for( x = 0; x < dsize.width; x++ )
3377 bufp.data.fl[x] += 1.f;
3379 for( x = 0; x < dsize.width; x++ )
3381 double rho = bufp.data.fl[x]*pscale;
3382 double phi = bufa.data.fl[x]*ascale;
3389 cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) );