opencv/src/cv/cvstereosgbm.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 /*
  44  This is a variation of
  45  "Stereo Processing by Semiglobal Matching and Mutual Information"
  46  by Heiko Hirschmuller.
  47
  48  We match blocks rather than individual pixels, thus the algorithm is called
  49  SGBM (Semi-global block matching)
  50  */
  51
  52 #include "_cv.h"
  53 #include <limits.h>
  54
  55 namespace cv
  56 {
  57
  58 typedef uchar PixType;
  59 typedef short CostType;
  60 typedef short DispType;
  61
  62 enum { NR = 16, NR2 = NR/2 };
  63
  64 StereoSGBM::StereoSGBM()
  65 {
  66     minDisparity = numberOfDisparities = 0;
  67     SADWindowSize = 0;
  68     P1 = P2 = 0;
  69     disp12MaxDiff = 0;
  70     preFilterCap = 0;
  71     uniquenessRatio = 0;
  72     speckleWindowSize = 0;
  73     speckleRange = 0;
  74     fullDP = false;
  75 }
  76
  77
  78 StereoSGBM::StereoSGBM( int _minDisparity, int _numDisparities, int _SADWindowSize,
  79                    int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,
  80                    int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,
  81                    bool _fullDP )
  82 {
  83     minDisparity = _minDisparity;
  84     numberOfDisparities = _numDisparities;
  85     SADWindowSize = _SADWindowSize;
  86     P1 = _P1;
  87     P2 = _P2;
  88     disp12MaxDiff = _disp12MaxDiff;
  89     preFilterCap = _preFilterCap;
  90     uniquenessRatio = _uniquenessRatio;
  91     speckleWindowSize = _speckleWindowSize;
  92     speckleRange = _speckleRange;
  93     fullDP = _fullDP;
  94 }
  95
  96
  97 StereoSGBM::~StereoSGBM()
  98 {
  99 }
 100
 101 /*
 102  For each pixel row1[x], max(-maxD, 0) <= minX <= x < maxX <= width - max(0, -minD),
 103  and for each disparity minD<=d<maxD the function
 104  computes the cost (cost[(x-minX)*(maxD - minD) + (d - minD)]), depending on the difference between
 105  row1[x] and row2[x-d]. The subpixel algorithm from
 106  "Depth Discontinuities by Pixel-to-Pixel Stereo" by Stan Birchfield and C. Tomasi
 107  is used, hence the suffix BT.
 108
 109  the temporary buffer should contain width2*2 elements
 110  */
 111 static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
 112                             int minD, int maxD, CostType* cost,
 113                             PixType* buffer, const PixType* tab,
 114                             int tabOfs, int ftzero )
 115 {
 116     int x, c, width = img1.cols, cn = img1.channels();
 117     int minX1 = max(maxD, 0), maxX1 = width + min(minD, 0);
 118     int minX2 = max(minX1 - maxD, 0), maxX2 = min(maxX1 - minD, width);
 119     int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
 120     const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
 121     PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
 122
 123     tab += tabOfs;
 124
 125     for( c = 0; c < cn*2; c++ )
 126     {
 127         prow1[width*c] = prow1[width*c + width-1] =
 128         prow2[width*c] = prow2[width*c + width-1] = tab[0];
 129     }
 130
 131     int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
 132     int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
 133
 134     if( cn == 1 )
 135     {
 136         for( x = 1; x < width-1; x++ )
 137         {
 138             prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
 139             prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
 140
 141             prow1[x+width] = row1[x];
 142             prow2[width-1-x+width] = row2[x];
 143         }
 144     }
 145     else
 146     {
 147         for( x = 1; x < width-1; x++ )
 148         {
 149             prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
 150             prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
 151             prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
 152
 153             prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
 154             prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
 155             prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
 156
 157             prow1[x+width*3] = row1[x*3];
 158             prow1[x+width*4] = row1[x*3+1];
 159             prow1[x+width*5] = row1[x*3+2];
 160
 161             prow2[width-1-x+width*3] = row2[x*3];
 162             prow2[width-1-x+width*4] = row2[x*3+1];
 163             prow2[width-1-x+width*5] = row2[x*3+2];
 164         }
 165     }
 166
 167     memset( cost, 0, width1*D*sizeof(cost[0]) );
 168
 169     buffer -= minX2;
 170     cost -= minX1*D + minD; // simplify the cost indices inside the loop
 171
 172 #if CV_SSE2
 173     volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
 174 #endif
 175
 176 #if 1
 177     for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
 178     {
 179         int diff_scale = c < cn ? 0 : 2;
 180
 181         // precompute
 182         //   v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
 183         //   v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
 184         for( x = minX2; x < maxX2; x++ )
 185         {
 186             int v = prow2[x];
 187             int vl = x > 0 ? (v + prow2[x-1])/2 : v;
 188             int vr = x < width-1 ? (v + prow2[x+1])/2 : v;
 189             int v0 = min(vl, vr); v0 = min(v0, v);
 190             int v1 = max(vl, vr); v1 = max(v1, v);
 191             buffer[x] = (PixType)v0;
 192             buffer[x + width2] = (PixType)v1;
 193         }
 194
 195         for( x = minX1; x < maxX1; x++ )
 196         {
 197             int u = prow1[x];
 198             int ul = x > 0 ? (u + prow1[x-1])/2 : u;
 199             int ur = x < width-1 ? (u + prow1[x+1])/2 : u;
 200             int u0 = min(ul, ur); u0 = min(u0, u);
 201             int u1 = max(ul, ur); u1 = max(u1, u);
 202
 203         #if CV_SSE2
 204             if( useSIMD )
 205             {
 206                 __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
 207                 __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
 208                 __m128i ds = _mm_cvtsi32_si128(diff_scale);
 209
 210                 for( int d = minD; d < maxD; d += 16 )
 211                 {
 212                     __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-x-1 + d));
 213                     __m128i _v0 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d));
 214                     __m128i _v1 = _mm_loadu_si128((const __m128i*)(buffer + width-x-1 + d + width2));
 215                     __m128i c0 = _mm_max_epu8(_mm_subs_epu8(_u, _v1), _mm_subs_epu8(_v0, _u));
 216                     __m128i c1 = _mm_max_epu8(_mm_subs_epu8(_v, _u1), _mm_subs_epu8(_u0, _v));
 217                     __m128i diff = _mm_min_epu8(c0, c1);
 218
 219                     c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
 220                     c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
 221
 222                     _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
 223                     _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
 224                 }
 225             }
 226             else
 227         #endif
 228             {
 229                 for( int d = minD; d < maxD; d++ )
 230                 {
 231                     int v = prow2[width-x-1 + d];
 232                     int v0 = buffer[width-x-1 + d];
 233                     int v1 = buffer[width-x-1 + d + width2];
 234                     int c0 = max(0, u - v1); c0 = max(c0, v0 - u);
 235                     int c1 = max(0, v - u1); c1 = max(c1, u0 - v);
 236
 237                     cost[x*D + d] = (CostType)(cost[x*D+d] + (min(c0, c1) >> diff_scale));
 238                 }
 239             }
 240         }
 241     }
 242 #else
 243     for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
 244     {
 245         for( x = minX1; x < maxX1; x++ )
 246         {
 247             int u = prow1[x];
 248         #if CV_SSE2
 249             if( useSIMD )
 250             {
 251                 __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
 252
 253                 for( int d = minD; d < maxD; d += 16 )
 254                 {
 255                     __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
 256                     __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
 257                     __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
 258                     __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
 259
 260                     _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
 261                     _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
 262                 }
 263             }
 264             else
 265         #endif
 266             {
 267                 for( int d = minD; d < maxD; d++ )
 268                 {
 269                     int v = prow2[width-1-x + d];
 270                     cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
 271                 }
 272             }
 273         }
 274     }
 275 #endif
 276 }
 277
 278
 279 /*
 280  computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
 281  that is, disp1buf(x, y)=d means that img1(x+roi.x, y+roi.y) ~ img2(x+roi.x-d, y+roi.y).
 282  minD <= d < maxD.
 283  disp2full is the reverse disparity map, that is:
 284  disp2full(x+roi.x,y+roi.y)=d means that img2(x+roi.x, y+roi.y) ~ img1(x+roi.x+d, y+roi.y)
 285
 286  note that disp1buf will have the same size as the roi and
 287  disp2full will have the same size as img1 (or img2).
 288  On exit disp2buf is not the final disparity, it is an intermediate result that becomes
 289  final after all the tiles are processed.
 290
 291  the disparity in disp1buf is written with sub-pixel accuracy
 292  (4 fractional bits, see CvStereoSGBM::DISP_SCALE),
 293  using quadratic interpolation, while the disparity in disp2buf
 294  is written as is, without interpolation.
 295
 296  disp2cost also has the same size as img1 (or img2).
 297  It contains the minimum current cost, used to find the best disparity, corresponding to the minimal cost.
 298  */
 299 static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
 300                                  Mat& disp1, const StereoSGBM& params,
 301                                  Mat& buffer )
 302 {
 303 #if CV_SSE2
 304     static const uchar LSBTab[] =
 305     {
 306         0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 307         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 308         6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 309         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 310         7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 311         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 312         6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 313         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
 314     };
 315
 316     volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
 317 #endif
 318
 319     const int ALIGN = 16;
 320     const int DISP_SHIFT = StereoSGBM::DISP_SHIFT;
 321     const int DISP_SCALE = StereoSGBM::DISP_SCALE;
 322     const CostType MAX_COST = SHRT_MAX;
 323
 324     int minD = params.minDisparity, maxD = minD + params.numberOfDisparities;
 325     Size SADWindowSize;
 326     SADWindowSize.width = SADWindowSize.height = params.SADWindowSize > 0 ? params.SADWindowSize : 5;
 327     int ftzero = max(params.preFilterCap, 15) | 1;
 328     int uniquenessRatio = params.uniquenessRatio > 0 ? params.uniquenessRatio : 10;
 329     int disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;
 330     int P1 = params.P1 > 0 ? params.P1 : 2, P2 = max(params.P2 > 0 ? params.P2 : 5, P1+1);
 331     int k, width = disp1.cols, height = disp1.rows;
 332     int minX1 = max(maxD, 0), maxX1 = width + min(minD, 0);
 333     int D = maxD - minD, width1 = maxX1 - minX1;
 334     int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
 335     int SW2 = SADWindowSize.width/2, SH2 = SADWindowSize.height/2;
 336     int npasses = params.fullDP ? 2 : 1;
 337     const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;
 338     PixType clipTab[TAB_SIZE];
 339
 340     for( k = 0; k < TAB_SIZE; k++ )
 341         clipTab[k] = (PixType)(min(max(k - TAB_OFS, -ftzero), ftzero) + ftzero);
 342
 343     if( minX1 >= maxX1 )
 344     {
 345         disp1 = Scalar::all(INVALID_DISP_SCALED);
 346         return;
 347     }
 348
 349     CV_Assert( D % 16 == 0 );
 350
 351     // NR - the number of directions. the loop on x below that computes Lr assumes that NR == 8.
 352     // if you change NR, please, modify the loop as well.
 353     int D2 = D+16, NRD2 = NR2*D2;
 354
 355     // the number of L_r(.,.) and min_k L_r(.,.) lines in the buffer:
 356     // for 8-way dynamic programming we need the current row and
 357     // the previous row, i.e. 2 rows in total
 358     const int NLR = 2;
 359     const int LrBorder = NLR - 1;
 360
 361     // for each possible stereo match (img1(x,y) <=> img2(x-d,y))
 362     // we keep pixel difference cost (C) and the summary cost over NR directions (S).
 363     // we also keep all the partial costs for the previous line L_r(x,d) and also min_k L_r(x, k)
 364     size_t costBufSize = width1*D;
 365     size_t CSBufSize = costBufSize*(params.fullDP ? height : 1);
 366     size_t minLrSize = (width1 + LrBorder*2)*NR2, LrSize = minLrSize*D2;
 367     int hsumBufNRows = SH2*2 + 2;
 368     size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
 369     costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
 370     CSBufSize*2*sizeof(CostType) + // C, S
 371     width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
 372     width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
 373
 374     if( !buffer.data || !buffer.isContinuous() ||
 375        buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )
 376         buffer.create(1, totalBufSize, CV_8U);
 377
 378     // summary cost over different (nDirs) directions
 379     CostType* Cbuf = (CostType*)alignPtr(buffer.data, ALIGN);
 380     CostType* Sbuf = Cbuf + CSBufSize;
 381     CostType* hsumBuf = Sbuf + CSBufSize;
 382     CostType* pixDiff = hsumBuf + costBufSize*hsumBufNRows;
 383
 384     CostType* disp2cost = pixDiff + costBufSize + (LrSize + minLrSize)*NLR;
 385     DispType* disp2ptr = (DispType*)(disp2cost + width);
 386     PixType* tempBuf = (PixType*)(disp2ptr + width);
 387
 388     // add P2 to every C(x,y). it saves a few operations in the inner loops
 389     for( k = 0; k < width1*D; k++ )
 390         Cbuf[k] = (CostType)P2;
 391
 392     for( int pass = 1; pass <= npasses; pass++ )
 393     {
 394         int x1, y1, x2, y2, dx, dy;
 395
 396         if( pass == 1 )
 397         {
 398             y1 = 0; y2 = height; dy = 1;
 399             x1 = 0; x2 = width1; dx = 1;
 400         }
 401         else
 402         {
 403             y1 = height-1; y2 = -1; dy = -1;
 404             x1 = width1-1; x2 = -1; dx = -1;
 405         }
 406
 407         CostType *Lr[NLR]={0}, *minLr[NLR]={0};
 408
 409         for( k = 0; k < NLR; k++ )
 410         {
 411             // shift Lr[k] and minLr[k] pointers, because we allocated them with the borders,
 412             // and will occasionally use negative indices with the arrays
 413             // we need to shift Lr[k] pointers by 1, to give the space for d=-1.
 414             // however, then the alignment will be imperfect, i.e. bad for SSE,
 415             // thus we shift the pointers by 8 (8*sizeof(short) == 16 - ideal alignment)
 416             Lr[k] = pixDiff + costBufSize + LrSize*k + NRD2*LrBorder + 8;
 417             memset( Lr[k] - LrBorder*NRD2 - 8, 0, LrSize*sizeof(CostType) );
 418             minLr[k] = pixDiff + costBufSize + LrSize*NLR + minLrSize*k + NR2*2;
 419             memset( minLr[k] - LrBorder*NR2, 0, minLrSize*sizeof(CostType) );
 420         }
 421
 422         for( int y = y1; y != y2; y += dy )
 423         {
 424             int x, d;
 425             DispType* disp1ptr = disp1.ptr<DispType>(y);
 426             CostType* C = Cbuf + (!params.fullDP ? 0 : y*costBufSize);
 427             CostType* S = Sbuf + (!params.fullDP ? 0 : y*costBufSize);
 428
 429             if( pass == 1 ) // compute C on the first pass, and reuse it on the second pass, if any.
 430             {
 431                 int dy1 = y == 0 ? 0 : y + SH2, dy2 = y == 0 ? SH2 : dy1;
 432
 433                 for( k = dy1; k <= dy2; k++ )
 434                 {
 435                     CostType* hsumAdd = hsumBuf + (min(k, height-1) % hsumBufNRows)*costBufSize;
 436
 437                     if( k < height )
 438                     {
 439                         calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
 440
 441                         memset(hsumAdd, 0, D*sizeof(CostType));
 442                         for( x = 0; x <= SW2*D; x += D )
 443                         {
 444                             int scale = x == 0 ? SW2 + 1 : 1;
 445                             for( d = 0; d < D; d++ )
 446                                 hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);
 447                         }
 448
 449                         if( y > 0 )
 450                         {
 451                             const CostType* hsumSub = hsumBuf + (max(y - SH2 - 1, 0) % hsumBufNRows)*costBufSize;
 452                             const CostType* Cprev = !params.fullDP || y == 0 ? C : C - costBufSize;
 453
 454                             for( x = D; x < width1*D; x += D )
 455                             {
 456                                 const CostType* pixAdd = pixDiff + min(x + SW2*D, (width1-1)*D);
 457                                 const CostType* pixSub = pixDiff + max(x - (SW2+1)*D, 0);
 458
 459                             #if CV_SSE2
 460                                 if( useSIMD )
 461                                 {
 462                                     for( d = 0; d < D; d += 8 )
 463                                     {
 464                                         __m128i hv = _mm_load_si128((const __m128i*)(hsumAdd + x - D + d));
 465                                         __m128i Cx = _mm_load_si128((__m128i*)(Cprev + x + d));
 466                                         hv = _mm_adds_epi16(_mm_subs_epi16(hv,
 467                                                                            _mm_load_si128((const __m128i*)(pixSub + d))),
 468                                                             _mm_load_si128((const __m128i*)(pixAdd + d)));
 469                                         Cx = _mm_adds_epi16(_mm_subs_epi16(Cx,
 470                                                                            _mm_load_si128((const __m128i*)(hsumSub + x + d))),
 471                                                             hv);
 472                                         _mm_store_si128((__m128i*)(hsumAdd + x + d), hv);
 473                                         _mm_store_si128((__m128i*)(C + x + d), Cx);
 474                                     }
 475                                 }
 476                                 else
 477                             #endif
 478                                 {
 479                                     for( d = 0; d < D; d++ )
 480                                     {
 481                                         int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
 482                                         C[x + d] = (CostType)(Cprev[x + d] + hv - hsumSub[x + d]);
 483                                     }
 484                                 }
 485                             }
 486                         }
 487                         else
 488                         {
 489                             for( x = D; x < width1*D; x += D )
 490                             {
 491                                 const CostType* pixAdd = pixDiff + min(x + SW2*D, (width1-1)*D);
 492                                 const CostType* pixSub = pixDiff + max(x - (SW2+1)*D, 0);
 493
 494                                 for( d = 0; d < D; d++ )
 495                                     hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);
 496                             }
 497                         }
 498                     }
 499
 500                     if( y == 0 )
 501                     {
 502                         int scale = k == 0 ? SH2 + 1 : 1;
 503                         for( x = 0; x < width1*D; x++ )
 504                             C[x] = (CostType)(C[x] + hsumAdd[x]*scale);
 505                     }
 506                 }
 507
 508                 // also, clear the S buffer
 509                 for( k = 0; k < width1*D; k++ )
 510                     S[k] = 0;
 511             }
 512
 513             // clear the left and the right borders
 514             memset( Lr[0] - NRD2*LrBorder - 8, 0, NRD2*LrBorder*sizeof(CostType) );
 515             memset( Lr[0] + width1*NRD2 - 8, 0, NRD2*LrBorder*sizeof(CostType) );
 516             memset( minLr[0] - NR2*LrBorder, 0, NR2*LrBorder*sizeof(CostType) );
 517             memset( minLr[0] + width1*NR2, 0, NR2*LrBorder*sizeof(CostType) );
 518
 519             /*
 520              [formula 13 in the paper]
 521              compute L_r(p, d) = C(p, d) +
 522              min(L_r(p-r, d),
 523              L_r(p-r, d-1) + P1,
 524              L_r(p-r, d+1) + P1,
 525              min_k L_r(p-r, k) + P2) - min_k L_r(p-r, k)
 526              where p = (x,y), r is one of the directions.
 527              we process all the directions at once:
 528              0: r=(-dx, 0)
 529              1: r=(-1, -dy)
 530              2: r=(0, -dy)
 531              3: r=(1, -dy)
 532              4: r=(-2, -dy)
 533              5: r=(-1, -dy*2)
 534              6: r=(1, -dy*2)
 535              7: r=(2, -dy)
 536              */
 537             for( x = x1; x != x2; x += dx )
 538             {
 539                 int xm = x*NR2, xd = xm*D2;
 540
 541                 int delta0 = minLr[0][xm - dx*NR2] + P2, delta1 = minLr[1][xm - NR2 + 1] + P2;
 542                 int delta2 = minLr[1][xm + 2] + P2, delta3 = minLr[1][xm + NR2 + 3] + P2;
 543
 544                 CostType* Lr_p0 = Lr[0] + xd - dx*NRD2;
 545                 CostType* Lr_p1 = Lr[1] + xd - NRD2 + D2;
 546                 CostType* Lr_p2 = Lr[1] + xd + D2*2;
 547                 CostType* Lr_p3 = Lr[1] + xd + NRD2 + D2*3;
 548
 549                 Lr_p0[-1] = Lr_p0[D] = Lr_p1[-1] = Lr_p1[D] =
 550                 Lr_p2[-1] = Lr_p2[D] = Lr_p3[-1] = Lr_p3[D] = MAX_COST;
 551
 552                 CostType* Lr_p = Lr[0] + xd;
 553                 const CostType* Cp = C + x*D;
 554                 CostType* Sp = S + x*D;
 555
 556             #if CV_SSE2
 557                 if( useSIMD )
 558                 {
 559                     __m128i _P1 = _mm_set1_epi16((short)P1);
 560
 561                     __m128i _delta0 = _mm_set1_epi16((short)delta0);
 562                     __m128i _delta1 = _mm_set1_epi16((short)delta1);
 563                     __m128i _delta2 = _mm_set1_epi16((short)delta2);
 564                     __m128i _delta3 = _mm_set1_epi16((short)delta3);
 565                     __m128i _minL0 = _mm_set1_epi16((short)MAX_COST);
 566
 567                     for( d = 0; d < D; d += 8 )
 568                     {
 569                         __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d));
 570                         __m128i L0, L1, L2, L3;
 571
 572                         L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
 573                         L1 = _mm_load_si128((const __m128i*)(Lr_p1 + d));
 574                         L2 = _mm_load_si128((const __m128i*)(Lr_p2 + d));
 575                         L3 = _mm_load_si128((const __m128i*)(Lr_p3 + d));
 576
 577                         L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
 578                         L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
 579
 580                         L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d - 1)), _P1));
 581                         L1 = _mm_min_epi16(L1, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p1 + d + 1)), _P1));
 582
 583                         L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d - 1)), _P1));
 584                         L2 = _mm_min_epi16(L2, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p2 + d + 1)), _P1));
 585
 586                         L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d - 1)), _P1));
 587                         L3 = _mm_min_epi16(L3, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p3 + d + 1)), _P1));
 588
 589                         L0 = _mm_min_epi16(L0, _delta0);
 590                         L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
 591
 592                         L1 = _mm_min_epi16(L1, _delta1);
 593                         L1 = _mm_adds_epi16(_mm_subs_epi16(L1, _delta1), Cpd);
 594
 595                         L2 = _mm_min_epi16(L2, _delta2);
 596                         L2 = _mm_adds_epi16(_mm_subs_epi16(L2, _delta2), Cpd);
 597
 598                         L3 = _mm_min_epi16(L3, _delta3);
 599                         L3 = _mm_adds_epi16(_mm_subs_epi16(L3, _delta3), Cpd);
 600
 601                         _mm_store_si128( (__m128i*)(Lr_p + d), L0);
 602                         _mm_store_si128( (__m128i*)(Lr_p + d + D2), L1);
 603                         _mm_store_si128( (__m128i*)(Lr_p + d + D2*2), L2);
 604                         _mm_store_si128( (__m128i*)(Lr_p + d + D2*3), L3);
 605
 606                         __m128i t0 = _mm_min_epi16(_mm_unpacklo_epi16(L0, L2), _mm_unpackhi_epi16(L0, L2));
 607                         __m128i t1 = _mm_min_epi16(_mm_unpacklo_epi16(L1, L3), _mm_unpackhi_epi16(L1, L3));
 608                         t0 = _mm_min_epi16(_mm_unpacklo_epi16(t0, t1), _mm_unpackhi_epi16(t0, t1));
 609                         _minL0 = _mm_min_epi16(_minL0, t0);
 610
 611                         __m128i Sval = _mm_load_si128((const __m128i*)(Sp + d));
 612
 613                         L0 = _mm_adds_epi16(L0, L1);
 614                         L2 = _mm_adds_epi16(L2, L3);
 615                         Sval = _mm_adds_epi16(Sval, L0);
 616                         Sval = _mm_adds_epi16(Sval, L2);
 617
 618                         _mm_store_si128((__m128i*)(Sp + d), Sval);
 619                     }
 620
 621                     _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
 622                     _mm_storel_epi64((__m128i*)&minLr[0][xm], _minL0);
 623                 }
 624                 else
 625             #endif
 626                 {
 627                     int minL0 = MAX_COST, minL1 = MAX_COST, minL2 = MAX_COST, minL3 = MAX_COST;
 628
 629                     for( d = 0; d < D; d++ )
 630                     {
 631                         int Cpd = Cp[d], L0, L1, L2, L3;
 632
 633                         L0 = Cpd + min((int)Lr_p0[d], min(Lr_p0[d-1] + P1, min(Lr_p0[d+1] + P1, delta0))) - delta0;
 634                         L1 = Cpd + min((int)Lr_p1[d], min(Lr_p1[d-1] + P1, min(Lr_p1[d+1] + P1, delta1))) - delta1;
 635                         L2 = Cpd + min((int)Lr_p2[d], min(Lr_p2[d-1] + P1, min(Lr_p2[d+1] + P1, delta2))) - delta2;
 636                         L3 = Cpd + min((int)Lr_p3[d], min(Lr_p3[d-1] + P1, min(Lr_p3[d+1] + P1, delta3))) - delta3;
 637
 638                         Lr_p[d] = (CostType)L0;
 639                         minL0 = min(minL0, L0);
 640
 641                         Lr_p[d + D2] = (CostType)L1;
 642                         minL1 = min(minL1, L1);
 643
 644                         Lr_p[d + D2*2] = (CostType)L2;
 645                         minL2 = min(minL2, L2);
 646
 647                         Lr_p[d + D2*3] = (CostType)L3;
 648                         minL3 = min(minL3, L3);
 649
 650                         Sp[d] = saturate_cast<CostType>(Sp[d] + L0 + L1 + L2 + L3);
 651                     }
 652                     minLr[0][xm] = (CostType)minL0;
 653                     minLr[0][xm+1] = (CostType)minL1;
 654                     minLr[0][xm+2] = (CostType)minL2;
 655                     minLr[0][xm+3] = (CostType)minL3;
 656                 }
 657             }
 658
 659             if( pass == npasses )
 660             {
 661                 for( x = 0; x < width; x++ )
 662                 {
 663                     disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
 664                     disp2cost[x] = MAX_COST;
 665                 }
 666
 667                 for( x = width1 - 1; x >= 0; x-- )
 668                 {
 669                     CostType* Sp = S + x*D;
 670                     int minS = MAX_COST, bestDisp = -1;
 671
 672                     if( npasses == 1 )
 673                     {
 674                         int xm = x*NR2, xd = xm*D2;
 675
 676                         int minL0 = MAX_COST;
 677                         int delta0 = minLr[0][xm + NR2] + P2;
 678                         CostType* Lr_p0 = Lr[0] + xd + NRD2;
 679                         Lr_p0[-1] = Lr_p0[D] = MAX_COST;
 680                         CostType* Lr_p = Lr[0] + xd;
 681
 682                         const CostType* Cp = C + x*D;
 683
 684                     #if CV_SSE2
 685                         if( useSIMD )
 686                         {
 687                             __m128i _P1 = _mm_set1_epi16((short)P1);
 688                             __m128i _delta0 = _mm_set1_epi16((short)delta0);
 689
 690                             __m128i _minL0 = _mm_set1_epi16((short)minL0);
 691                             __m128i _minS = _mm_set1_epi16(MAX_COST), _bestDisp = _mm_set1_epi16(-1);
 692                             __m128i _d8 = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7), _8 = _mm_set1_epi16(8);
 693
 694                             for( d = 0; d < D; d += 8 )
 695                             {
 696                                 __m128i Cpd = _mm_load_si128((const __m128i*)(Cp + d)), L0;
 697
 698                                 L0 = _mm_load_si128((const __m128i*)(Lr_p0 + d));
 699                                 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d - 1)), _P1));
 700                                 L0 = _mm_min_epi16(L0, _mm_adds_epi16(_mm_loadu_si128((const __m128i*)(Lr_p0 + d + 1)), _P1));
 701                                 L0 = _mm_min_epi16(L0, _delta0);
 702                                 L0 = _mm_adds_epi16(_mm_subs_epi16(L0, _delta0), Cpd);
 703
 704                                 _mm_store_si128((__m128i*)(Lr_p + d), L0);
 705                                 _minL0 = _mm_min_epi16(_minL0, L0);
 706                                 L0 = _mm_adds_epi16(L0, *(__m128i*)(Sp + d));
 707                                 _mm_store_si128((__m128i*)(Sp + d), L0);
 708
 709                                 __m128i mask = _mm_cmpgt_epi16(_minS, L0);
 710                                 _minS = _mm_min_epi16(_minS, L0);
 711                                 _bestDisp = _mm_xor_si128(_bestDisp, _mm_and_si128(_mm_xor_si128(_bestDisp,_d8), mask));
 712                                 _d8 = _mm_adds_epi16(_d8, _8);
 713                             }
 714
 715                             short CV_DECL_ALIGNED(16) bestDispBuf[8];
 716                             _mm_store_si128((__m128i*)bestDispBuf, _bestDisp);
 717
 718                             _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 8));
 719                             _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
 720                             _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
 721
 722                             __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
 723                             qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
 724                             qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
 725
 726                             minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
 727                             minS = (CostType)_mm_cvtsi128_si32(qS);
 728
 729                             qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
 730                             qS = _mm_cmpeq_epi16(_minS, qS);
 731                             int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
 732
 733                             bestDisp = bestDispBuf[LSBTab[idx]];
 734                         }
 735                         else
 736                     #endif
 737                         {
 738                             for( d = 0; d < D; d++ )
 739                             {
 740                                 int L0 = Cp[d] + min((int)Lr_p0[d], min(Lr_p0[d-1] + P1, min(Lr_p0[d+1] + P1, delta0))) - delta0;
 741
 742                                 Lr_p[d] = (CostType)L0;
 743                                 minL0 = min(minL0, L0);
 744
 745                                 int Sval = Sp[d] = saturate_cast<CostType>(Sp[d] + L0);
 746                                 if( Sval < minS )
 747                                 {
 748                                     minS = Sval;
 749                                     bestDisp = d;
 750                                 }
 751                             }
 752                             minLr[0][xm] = (CostType)minL0;
 753                         }
 754                     }
 755                     else
 756                     {
 757                         for( d = 0; d < D; d++ )
 758                         {
 759                             int Sval = Sp[d];
 760                             if( Sval < minS )
 761                             {
 762                                 minS = Sval;
 763                                 bestDisp = d;
 764                             }
 765                         }
 766                     }
 767
 768                     for( d = 0; d < D; d++ )
 769                     {
 770                         if( Sp[d]*(100 - uniquenessRatio) < minS*100 && std::abs(bestDisp - d) > 1 )
 771                             break;
 772                     }
 773                     if( d < D )
 774                         continue;
 775                     d = bestDisp;
 776                     int x2 = x + minX1 - d - minD;
 777                     if( disp2cost[x2] > minS )
 778                     {
 779                         disp2cost[x2] = (CostType)minS;
 780                         disp2ptr[x2] = (DispType)(d + minD);
 781                     }
 782
 783                     if( 0 < d && d < D-1 )
 784                     {
 785                         // do subpixel quadratic interpolation:
 786                         //   fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])
 787                         //   then find minimum of the parabola.
 788                         int denom2 = max(Sp[d-1] + Sp[d+1] - 2*Sp[d], 1);
 789                         d = d*DISP_SCALE + ((Sp[d-1] - Sp[d+1])*DISP_SCALE + denom2)/(denom2*2);
 790                     }
 791                     else
 792                         d *= DISP_SCALE;
 793                     disp1ptr[x + minX1] = (DispType)(d + minD*DISP_SCALE);
 794                 }
 795
 796                 for( x = minX1; x < maxX1; x++ )
 797                 {
 798                     // we round the computed disparity both towards -inf and +inf and check
 799                     // if either of the corresponding disparities in disp2 is consistent.
 800                     // This is to give the computed disparity a chance to look valid if it is.
 801                     int d = disp1ptr[x];
 802                     if( d == INVALID_DISP_SCALED )
 803                         continue;
 804                     int _d = d >> DISP_SHIFT;
 805                     int d_ = (d + DISP_SCALE-1) >> DISP_SHIFT;
 806                     int _x = x - _d, x_ = x - d_;
 807                     if( 0 <= _x && _x < width && disp2ptr[_x] >= minD && std::abs(disp2ptr[_x] - _d) > disp12MaxDiff &&
 808                        0 <= x_ && x_ < width && disp2ptr[x_] >= minD && std::abs(disp2ptr[x_] - d_) > disp12MaxDiff )
 809                         disp1ptr[x] = (DispType)INVALID_DISP_SCALED;
 810                 }
 811             }
 812
 813             // now shift the cyclic buffers
 814             std::swap( Lr[0], Lr[1] );
 815             std::swap( minLr[0], minLr[1] );
 816         }
 817     }
 818 }
 819
 820 typedef cv::Point_<short> Point2s;
 821
 822 void filterSpeckles( Mat& img, double _newval, int maxSpeckleSize, double _maxDiff, Mat& _buf )
 823 {
 824     CV_Assert( img.type() == CV_16SC1 );
 825
 826     int newVal = cvRound(_newval);
 827     int maxDiff = cvRound(_maxDiff);
 828     int width = img.cols, height = img.rows, npixels = width*height;
 829     size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));
 830     if( !_buf.isContinuous() || !_buf.data || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )
 831         _buf.create(1, bufSize, CV_8U);
 832
 833     uchar* buf = _buf.data;
 834     int i, j, dstep = img.step/sizeof(short);
 835     int* labels = (int*)buf;
 836     buf += npixels*sizeof(labels[0]);
 837     Point2s* wbuf = (Point2s*)buf;
 838     buf += npixels*sizeof(wbuf[0]);
 839     uchar* rtype = (uchar*)buf;
 840     int curlabel = 0;
 841
 842     // clear out label assignments
 843     memset(labels, 0, npixels*sizeof(labels[0]));
 844
 845     for( i = 0; i < height; i++ )
 846     {
 847         short* ds = img.ptr<short>(i);
 848         int* ls = labels + width*i;
 849
 850         for( j = 0; j < width; j++ )
 851         {
 852             if( ds[j] != newVal )       // not a bad disparity
 853             {
 854                 if( ls[j] )             // has a label, check for bad label
 855                 {
 856                     if( rtype[ls[j]] ) // small region, zero out disparity
 857                         ds[j] = (short)newVal;
 858                 }
 859                 // no label, assign and propagate
 860                 else
 861                 {
 862                     Point2s* ws = wbuf; // initialize wavefront
 863                     Point2s p((short)j, (short)i);      // current pixel
 864                     curlabel++; // next label
 865                     int count = 0;      // current region size
 866                     ls[j] = curlabel;
 867
 868                     // wavefront propagation
 869                     while( ws >= wbuf ) // wavefront not empty
 870                     {
 871                         count++;
 872                         // put neighbors onto wavefront
 873                         short* dpp = &img.at<short>(p.y, p.x);
 874                         short dp = *dpp;
 875                         int* lpp = labels + width*p.y + p.x;
 876
 877                         if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )
 878                         {
 879                             lpp[+1] = curlabel;
 880                             *ws++ = Point2s(p.x+1, p.y);
 881                         }
 882
 883                         if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )
 884                         {
 885                             lpp[-1] = curlabel;
 886                             *ws++ = Point2s(p.x-1, p.y);
 887                         }
 888
 889                         if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )
 890                         {
 891                             lpp[+width] = curlabel;
 892                             *ws++ = Point2s(p.x, p.y+1);
 893                         }
 894
 895                         if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )
 896                         {
 897                             lpp[-width] = curlabel;
 898                             *ws++ = Point2s(p.x, p.y-1);
 899                         }
 900
 901                         // pop most recent and propagate
 902                         // NB: could try least recent, maybe better convergence
 903                         p = *--ws;
 904                     }
 905
 906                     // assign label type
 907                     if( count <= maxSpeckleSize )       // speckle region
 908                     {
 909                         rtype[ls[j]] = 1;       // small region label
 910                         ds[j] = (short)newVal;
 911                     }
 912                     else
 913                         rtype[ls[j]] = 0;       // large region label
 914                 }
 915             }
 916         }
 917     }
 918 }
 919
 920
 921 void StereoSGBM::operator ()( const Mat& left, const Mat& right, Mat& disp )
 922 {
 923     CV_Assert( left.size() == right.size() && left.type() == right.type() &&
 924               left.depth() == DataType<PixType>::depth );
 925
 926     disp.create( left.size(), CV_16S );
 927
 928     computeDisparitySGBM( left, right, disp, *this, buffer );
 929     medianBlur(disp, disp, 3);
 930
 931     if( speckleWindowSize > 0 )
 932         filterSpeckles(disp, (minDisparity - 1)*DISP_SCALE, 100, DISP_SCALE, buffer);
 933 }
 934
 935
 936 Rect getValidDisparityROI( Rect roi1, Rect roi2,
 937                            int minDisparity,
 938                            int numberOfDisparities,
 939                            int SADWindowSize )
 940 {
 941     int SW2 = SADWindowSize/2;
 942     int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1;
 943
 944     int xmin = max(roi1.x, roi2.x + maxD) + SW2;
 945     int xmax = min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2;
 946     int ymin = max(roi1.y, roi2.y) + SW2;
 947     int ymax = min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2;
 948
 949     Rect r(xmin, ymin, xmax - xmin, ymax - ymin);
 950
 951     return r.width > 0 && r.height > 0 ? r : Rect();
 952 }
 953
 954
 955 void validateDisparity( Mat& disp, const Mat& cost, int minDisparity, int numberOfDisparities, int disp12MaxDiff )
 956 {
 957     int cols = disp.cols, rows = disp.rows;
 958     int minD = minDisparity, maxD = minDisparity + numberOfDisparities;
 959     int x, minX1 = max(maxD, 0), maxX1 = cols + min(minD, 0);
 960     AutoBuffer<int> _disp2buf(cols*2);
 961     int* disp2buf = _disp2buf;
 962     int* disp2cost = disp2buf + cols;
 963     const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;
 964     int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;
 965     int costType = cost.type();
 966
 967     disp12MaxDiff *= DISP_SCALE;
 968
 969     CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S &&
 970               (costType == CV_16S || costType == CV_32S) &&
 971               disp.size() == cost.size() );
 972
 973     for( int y = 0; y < rows; y++ )
 974     {
 975         short* dptr = disp.ptr<short>(y);
 976
 977         for( x = 0; x < cols; x++ )
 978         {
 979             disp2buf[x] = INVALID_DISP;
 980             disp2cost[x] = INT_MAX;
 981         }
 982
 983         if( costType == CV_16S )
 984         {
 985             const short* cptr = cost.ptr<short>(y);
 986
 987             for( x = minX1; x < maxX1; x++ )
 988             {
 989                 int d = dptr[x], c = cptr[x];
 990                 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
 991
 992                 if( disp2cost[x2] > c )
 993                 {
 994                     disp2cost[x2] = c;
 995                     disp2buf[x2] = d;
 996                 }
 997             }
 998         }
 999         else
1000         {
1001             const int* cptr = cost.ptr<int>(y);
1002
1003             for( x = minX1; x < maxX1; x++ )
1004             {
1005                 int d = dptr[x], c = cptr[x];
1006                 int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);
1007
1008                 if( disp2cost[x2] < c )
1009                 {
1010                     disp2cost[x2] = c;
1011                     disp2buf[x2] = d;
1012                 }
1013             }
1014         }
1015
1016         for( x = minX1; x < maxX1; x++ )
1017         {
1018             // we round the computed disparity both towards -inf and +inf and check
1019             // if either of the corresponding disparities in disp2 is consistent.
1020             // This is to give the computed disparity a chance to look valid if it is.
1021             int d = dptr[x];
1022             if( d == INVALID_DISP_SCALED )
1023                 continue;
1024             int d0 = d >> DISP_SHIFT;
1025             int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT;
1026             int x0 = x - d0, x1 = x - d1;
1027             if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP && std::abs(disp2buf[x0] - d) > disp12MaxDiff) &&
1028                 (0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP && std::abs(disp2buf[x1] - d) > disp12MaxDiff) )
1029                 dptr[x] = (short)INVALID_DISP_SCALED;
1030         }
1031     }
1032 }
1033
1034 }
1035
1036 CvRect cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity,
1037                                int numberOfDisparities, int SADWindowSize )
1038 {
1039     return (CvRect)cv::getValidDisparityROI( roi1, roi2, minDisparity,
1040                                              numberOfDisparities, SADWindowSize );
1041 }
1042
1043 void cvValidateDisparity( CvArr* _disp, const CvArr* _cost, int minDisparity,
1044                           int numberOfDisparities, int disp12MaxDiff )
1045 {
1046     cv::Mat disp = cv::cvarrToMat(_disp), cost = cv::cvarrToMat(_cost);
1047     cv::validateDisparity( disp, cost, minDisparity, numberOfDisparities, disp12MaxDiff );
1048 }